require(tidyverse)
require(reshape2)
require(RColorBrewer)
require(ComplexHeatmap)
require(dendextend)
require(circlize)
require(viridis)
require(naniar)
require(pals)
require(PermCor)
require(vegan)
library(gridExtra)
library(data.table)
lmp <- function (modelobject) {
if (class(modelobject) != "lm") stop("Not an object of class 'lm' ")
f <- summary(modelobject)$fstatistic
p <- pf(f[1],f[2],f[3],lower.tail=F)
attributes(p) <- NULL
return(p)
}
source("/Users/emifaure/Documents/TONGA/StageMilo/PetB_Corr_Heatmaps_RDA/scoresRDA.R")
########## Load metadata ############
meta <- read.table(file = "/Users/emifaure/Documents/ACE/MetaData/ACE_metadata/Metadata_For_Submission/meta_CTD_ForSubmission.csv",
header=TRUE, sep = ";", dec = ".", stringsAsFactors = FALSE)
meta <- meta %>%
mutate_if(.predicate = is.character, .funs = as.factor)
# Load list of metagenomes
metagenomes <- read.table("/Users/emifaure/Documents/ACE/Metagenomics/ACEsamples_With_Ace_seq_name.tsv", header = TRUE)
# Load Nickel dataset
Nickel=read.table("/Users/emifaure/Documents/ACE/NolwennCollab/NickelData.txt", sep="\t", header=T, dec=",")
meta_nickel = meta %>% inner_join(Nickel, by=c("TM_station_number","Depth_m")) %>%
filter(!is.na(Ni_60_58_D_DELTA_BOTTLE))
# 85 ACE_seq_Name have a nickel values, how many in out sequenced metagenomes ?
meta_nickel = meta_nickel[which(meta_nickel$ACE_seq_name %in% metagenomes$ace_seq_name),]
# we have 48 metagenomes that can be associated with an isotopic value
########## Load Metagnomics data ############
# CDHit clusters of interest were selected according to their functional annotations:
# - Annotations containing one of the words "urease", "NiFe", ["superoxide"+"dismutase"] in eggNOG annotation file
# - Annotations as KEGG ID corresponding to an urease, NiFe hydrgenase or superoxide dismutase
# The relative transformed RLE data (makes sense to sum them, reflect % of mapped reads in sample):
GeneMat = read.table("/Users/emifaure/Documents/ACE/NolwennCollab/Post_Review/GM_CDHit_relative-trans_MetalloEnzymes.tsv", sep="\t", header=T)
# The sample names are not in ACE_Seq format, we make the conversion :
for (i in 2:ncol(GeneMat)){
names(GeneMat)[i] <- sub("_SORTED", "", names(GeneMat)[i])
names(GeneMat)[i] <- metagenomes$ace_seq_name[which(metagenomes$sample==names(GeneMat)[i])]
}
row.names(GeneMat)=GeneMat$CDHit_ID
GeneMat_Nickel = GeneMat[,names(GeneMat) %in% meta_nickel$ACE_seq_name]
GeneMat_Nickel=as.data.frame(t(GeneMat_Nickel))
# Results from the ACE metagenomics catalog showed that abundance patterns can be extremely different across size fractions for one AGC, and it should make more sense to divide between A (>3 micrometers, attached and eukaryotes) and B+C (0.2-3 and 0.2-40 micrometers, which are dominated by prokaryotes).
meta_nickel_FL = meta_nickel[-which(meta_nickel$Size_fraction==">3 µm"),]
meta_nickel_ATT = meta_nickel[which(meta_nickel$Size_fraction==">3 µm"),]
GeneMat_Nickel_FL = GeneMat_Nickel[row.names(GeneMat_Nickel) %in% meta_nickel_FL$ACE_seq_name,]
GeneMat_Nickel_FL=GeneMat_Nickel_FL[match(meta_nickel_FL$ACE_seq_name,row.names(GeneMat_Nickel_FL)),]
#Remove columns of only 0
GeneMat_Nickel_FL = GeneMat_Nickel_FL[,-which(colSums(GeneMat_Nickel_FL)==0)]
GeneMat_Nickel_ATT = GeneMat_Nickel[row.names(GeneMat_Nickel) %in% meta_nickel_ATT$ACE_seq_name,]
GeneMat_Nickel_ATT=GeneMat_Nickel_ATT[match(meta_nickel_ATT$ACE_seq_name,row.names(GeneMat_Nickel_ATT)),]
#Remove columns of only 0
GeneMat_Nickel_ATT = GeneMat_Nickel_ATT[,-which(colSums(GeneMat_Nickel_ATT)==0)]
# Load functional annotations for the CD-Hit clusters
AnnotFull=read.table("/Users/emifaure/Documents/ACE/NolwennCollab/Annotations_CDHit_Metallo.tsv", sep="\t", header=F,fill=T, quote = '', na.strings =c('','-'))
names(AnnotFull)=c("Gene_ID","AGC_ID","AGC_Rep","AGC_Size","AGC_Cat","Singl_Cat","CDHit_Rep","Domain","KEGG","seed_ortholog","eggNOG_OGs","narr_OG_name","narr_OG_cat","narr_OG_desc","best_OG_name","best_OG_cat","best_OG_desc","Preferred_name","CAZy","BiGG_Reaction","PFAMs")
AnnotTransport=read.table("/Users/emifaure/Documents/ACE/NolwennCollab/Annotations_CDHit_nickeltransport.tsv", sep="\t", header=F,fill=T, quote = '', na.strings =c('','-'))
names(AnnotTransport)=c("Gene_ID","AGC_ID","AGC_Rep","AGC_Size","AGC_Cat","Singl_Cat","CDHit_Rep","Domain","KEGG","seed_ortholog","eggNOG_OGs","narr_OG_name","narr_OG_cat","narr_OG_desc","best_OG_name","best_OG_cat","best_OG_desc","Preferred_name","CAZy","BiGG_Reaction","PFAMs")
# First we will agreggate the CD-Hit clusters by broad functional type
Func_simplify <- rep("Unclassified",nrow(AnnotFull))
Func_simplify[grepl("urease", do.call(paste0, AnnotFull), ignore.case=TRUE)]="Urease"
Func_simplify[grepl("ure", do.call(paste0, AnnotFull), ignore.case=TRUE)]="Urease"
Func_simplify[grepl("Ure", do.call(paste0, AnnotFull), ignore.case=TRUE)]="Urease"
Func_simplify[grepl("K03190", do.call(paste0, AnnotFull), ignore.case=TRUE)]="Urease"
Func_simplify[grepl("K03189", do.call(paste0, AnnotFull), ignore.case=TRUE)]="Urease"
Func_simplify[grepl("K03188", do.call(paste0, AnnotFull), ignore.case=TRUE)]="Urease"
Func_simplify[grepl("NiFe", do.call(paste0, AnnotFull), ignore.case=TRUE)]="NiFe Hydrogenase"
Func_simplify[grepl("hydrogenase", do.call(paste0, AnnotFull), ignore.case=TRUE)]="NiFe Hydrogenase"
Func_simplify[grepl("superoxide", do.call(paste0, AnnotFull), ignore.case=TRUE)]="SOD"
Func_simplify[grepl("sod", do.call(paste0, AnnotFull), ignore.case=TRUE)]="SOD"
Func_simplify[grepl("Sod_", do.call(paste0, AnnotFull), ignore.case=TRUE)]="SOD"
Func_simplify[grepl("K04565", do.call(paste0, AnnotFull), ignore.case=TRUE)]="SOD"
AnnotFull$FuncSimplify <- Func_simplify
Enzyme_IDs=unique(AnnotFull$FuncSimplify)[!is.na(unique(AnnotFull$FuncSimplify))]
#Initiate matrix for FL:
Enzyme_matrix_FL=GeneMat_Nickel_FL[,1:length(Enzyme_IDs)]
#Fill matrix
for (i in c(1:length(Enzyme_IDs))) {
Enzyme_matrix_FL[,i]=rowSums(GeneMat_Nickel_FL[,which(names(GeneMat_Nickel_FL) %in% AnnotFull[which(AnnotFull$FuncSimplify==Enzyme_IDs[i]),"Gene_ID"]),drop=F])
names(Enzyme_matrix_FL)[i]=Enzyme_IDs[i]
}
#Initiate matrix for ATT:
Enzyme_matrix_ATT=GeneMat_Nickel_ATT[,1:length(Enzyme_IDs)]
#Fill matrix
for (i in c(1:length(Enzyme_IDs))) {
Enzyme_matrix_ATT[,i]=rowSums(GeneMat_Nickel_ATT[,which(names(GeneMat_Nickel_ATT) %in% AnnotFull[which(AnnotFull$FuncSimplify==Enzyme_IDs[i]),"Gene_ID"]),drop=F])
names(Enzyme_matrix_ATT)[i]=Enzyme_IDs[i]
}
# Add the nickel data and metadata
EnzymeFL_Ni = merge(Enzyme_matrix_FL,meta_nickel_FL,by.x="row.names",by.y="ACE_seq_name")
EnzymeATT_Ni = merge(Enzyme_matrix_ATT,meta_nickel_ATT,by.x="row.names",by.y="ACE_seq_name")
# Plots
# FL SF :
EnzymeFL_Ni_plot <- EnzymeFL_Ni %>% select(SOD,Urease,`NiFe Hydrogenase`,Ni_60_58_D_DELTA_BOTTLE,MertzGlacier) %>%
pivot_longer(-c(Ni_60_58_D_DELTA_BOTTLE,MertzGlacier),names_to = "Enzyme",values_to = "Abundance") %>%
mutate(Enzyme = fct_relevel(Enzyme,
"NiFe Hydrogenase", "SOD", "Urease"))
ggplot(EnzymeFL_Ni_plot) +
geom_smooth(aes(x=Ni_60_58_D_DELTA_BOTTLE,y=Abundance*100,col=Enzyme),method="lm",alpha=0.2) +
geom_point(aes(x=Ni_60_58_D_DELTA_BOTTLE,y=Abundance*100,col=Enzyme,shape=MertzGlacier),size=3) +
scale_color_brewer(palette = "Dark2") +
theme_bw() +
labs(y="Metagenomic relative abundance (% of coverage)", x=expression(delta^60 * Ni ~ "(‰)"), shape="Mertz Glacier") +
theme(text = element_text(size = 16)) +
facet_wrap(~Enzyme,scales = "free",nrow = 1)
testFL <- data.frame(Enzyme=c("SOD","Urease","NiFe Hydrogenase"),pval=rep(0,3), coef=rep(0,3), R2=rep(0,3))
for(i in c(1:3)) {
testFL$pval[i] <- perm_test(EnzymeFL_Ni[,testFL$Enzyme[i]],EnzymeFL_Ni$Ni_60_58_D_DELTA_BOTTLE,method = "Spearman", alternative = "two.sided", B=1000)$p.value
res=lm(EnzymeFL_Ni[,testFL$Enzyme[i]]~EnzymeFL_Ni$Ni_60_58_D_DELTA_BOTTLE)
testFL$R2[i]=summary(res)$adj.r.squared
testFL$coef[i]=res$coefficients[2]
}
testFL$pvalspear.adj=p.adjust(testFL$pval, method="BH")
testFL
## Enzyme pval coef R2 pvalspear.adj
## 1 SOD 0.313 9.143826e-05 0.045527667 0.313
## 2 Urease 0.192 -3.051448e-04 -0.001883718 0.288
## 3 NiFe Hydrogenase 0.130 -6.195455e-05 -0.016610441 0.288
# Try without Mertz :
EnzymeFL_Ni_nomertz <- EnzymeFL_Ni[-which(EnzymeFL_Ni$MertzGlacier==TRUE),]
testFL <- data.frame(Enzyme=c("SOD","Urease","NiFe Hydrogenase"),pval=rep(0,3))
for(i in c(1:3)) {
testFL$pval[i] <- perm_test(EnzymeFL_Ni_nomertz[,testFL$Enzyme[i]],EnzymeFL_Ni_nomertz$Ni_60_58_D_DELTA_BOTTLE,method = "Spearman", alternative = "two.sided", B=1000)$p.value
}
testFL$pvalspear.adj=p.adjust(testFL$pval, method="BH")
testFL
## Enzyme pval pvalspear.adj
## 1 SOD 0.857 0.857
## 2 Urease 0.079 0.237
## 3 NiFe Hydrogenase 0.422 0.633
# ATT SF :
EnzymeATT_Ni_plot <- EnzymeATT_Ni %>% select(SOD,Urease,`NiFe Hydrogenase`,Ni_60_58_D_DELTA_BOTTLE,MertzGlacier) %>%
pivot_longer(-c(Ni_60_58_D_DELTA_BOTTLE,MertzGlacier),names_to = "Enzyme",values_to = "Abundance") %>%
mutate(Enzyme = fct_relevel(Enzyme,
"NiFe Hydrogenase", "SOD", "Urease"))
ggplot(EnzymeATT_Ni_plot) +
geom_smooth(aes(x=Ni_60_58_D_DELTA_BOTTLE,y=Abundance*100,col=Enzyme),method="lm",alpha=0.2) +
geom_point(aes(x=Ni_60_58_D_DELTA_BOTTLE,y=Abundance*100,col=Enzyme,shape=MertzGlacier),size=3) +
scale_color_brewer(palette = "Dark2") +
theme_bw() +
labs(y="Metagenomic relative abundance (% of coverage)", x=expression(delta^60 * Ni ~ "(‰)"), shape="Mertz Glacier") +
theme(text = element_text(size = 16)) +
facet_wrap(~Enzyme,scales = "free",nrow=1)
#ggsave("Documents/ACE/NolwennCollab/Post_Review/Figure3_PanelB_PostReview_relab_V2.pdf",width = 40, height = 15, units = "cm", device = cairo_pdf)
testATT <- data.frame(Enzyme=c("SOD","Urease","NiFe Hydrogenase"),pval=rep(0,3), coef=rep(0,3), R2=rep(0,3))
for(i in c(1:3)) {
testATT$pval[i] <- perm_test(EnzymeATT_Ni[,testATT$Enzyme[i]],EnzymeATT_Ni$Ni_60_58_D_DELTA_BOTTLE,method = "Spearman", alternative = "two.sided", B=1000)$p.value
res=lm(EnzymeATT_Ni[,testATT$Enzyme[i]]~EnzymeATT_Ni$Ni_60_58_D_DELTA_BOTTLE)
testATT$R2[i]=summary(res)$adj.r.squared
testATT$coef[i]=res$coefficients[2]
}
testATT$pvalspear.adj=p.adjust(testATT$pval, method="BH")
testATT
## Enzyme pval coef R2 pvalspear.adj
## 1 SOD 0.830 9.350593e-06 -0.06218151 0.8300
## 2 Urease 0.105 1.428125e-04 0.08866117 0.2565
## 3 NiFe Hydrogenase 0.171 -1.021772e-05 -0.01944228 0.2565
KEGG_IDs=unique(AnnotFull$KEGG)[!is.na(unique(AnnotFull$KEGG))]
#Initiate matrix :
KEGG_matrix_FL=GeneMat_Nickel_FL[,1:length(KEGG_IDs)]
#Fill matrix
for (i in c(1:length(KEGG_IDs))) {
KEGG_matrix_FL[,i]=rowSums(GeneMat_Nickel_FL[,which(names(GeneMat_Nickel_FL) %in% AnnotFull[which(AnnotFull$KEGG==KEGG_IDs[i]),"Gene_ID"]),drop=F])
names(KEGG_matrix_FL)[i]=KEGG_IDs[i]
}
KEGG_matrix_FL=KEGG_matrix_FL[,-which(colSums(KEGG_matrix_FL)==0)]
#Initiate matrix :
KEGG_matrix_ATT=GeneMat_Nickel_ATT[,1:length(KEGG_IDs)]
#Fill matrix
for (i in c(1:length(KEGG_IDs))) {
KEGG_matrix_ATT[,i]=rowSums(GeneMat_Nickel_ATT[,which(names(GeneMat_Nickel_ATT) %in% AnnotFull[which(AnnotFull$KEGG==KEGG_IDs[i]),"Gene_ID"]), drop=F])
names(KEGG_matrix_ATT)[i]=KEGG_IDs[i]
}
KEGG_matrix_ATT=KEGG_matrix_ATT[,-which(colSums(KEGG_matrix_ATT)==0)]
# FL size fraction
Results_lm_FL=data.frame(KEGG_ID=c("KEGGID"),R2adj=c(0.0),Coef=c(0.0), pvallm=c(0.0), corspear=c(0.0),pvalspear=c(0.0))
# We need to match orders of row names before computing any stats :
KEGG_matrix_FL=KEGG_matrix_FL[match(meta_nickel_FL$ACE_seq_name,row.names(KEGG_matrix_FL)),]
for (i in c(1:ncol(KEGG_matrix_FL))) {
res=lm(KEGG_matrix_FL[,i]~meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE)
agc_id=names(KEGG_matrix_FL)[i]
R2=summary(res)$adj.r.squared
pvallm=lmp(res)
slope=res$coefficients[2]
corspear=cor(KEGG_matrix_FL[,i],meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE,method = "spearman")
pvalspear=perm_test(KEGG_matrix_FL[,i],meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE,method = "Spearman", alternative = "two.sided")$p.value
Results_lm_FL[i,]=c(agc_id,R2,slope,pvallm,corspear,pvalspear)
}
Results_lm_FL$KEGG_ID=as.factor(Results_lm_FL$KEGG_ID)
Results_lm_FL$R2adj=as.numeric(Results_lm_FL$R2adj)
Results_lm_FL$Coef=as.numeric(Results_lm_FL$Coef)
Results_lm_FL$pvallm=as.numeric(Results_lm_FL$pvallm)
Results_lm_FL$pvallm.adj=p.adjust(Results_lm_FL$pvallm, method="BH")
Results_lm_FL$corspear=as.numeric(Results_lm_FL$corspear)
Results_lm_FL$pvalspear=as.numeric(Results_lm_FL$pvalspear)
Results_lm_FL$pvalspear.adj=p.adjust(Results_lm_FL$pvalspear, method="BH")
summary(Results_lm_FL)
## KEGG_ID R2adj Coef pvallm
## K03187 :1 Min. :-0.03571 Min. :-6.525e-05 Min. :0.0004418
## K03188 :1 1st Qu.:-0.03502 1st Qu.:-1.002e-06 1st Qu.:0.2350029
## K03189 :1 Median :-0.00844 Median :-1.245e-07 Median :0.3915798
## K03190 :1 Mean : 0.03547 Mean :-5.374e-06 Mean :0.5079253
## K03190,K03188:1 3rd Qu.: 0.01607 3rd Qu.: 2.020e-06 3rd Qu.:0.8917667
## K03192 :1 Max. : 0.33866 Max. : 2.389e-05 Max. :0.9941537
## (Other) :9
## corspear pvalspear pvallm.adj pvalspear.adj
## Min. :-0.56021 Min. :0.0000 Min. :0.006628 Min. :0.0000
## 1st Qu.:-0.17482 1st Qu.:0.1010 1st Qu.:0.721413 1st Qu.:0.3330
## Median :-0.06106 Median :0.4840 Median :0.734212 Median :0.8561
## Mean :-0.05216 Mean :0.4163 Mean :0.721312 Mean :0.6002
## 3rd Qu.: 0.09996 3rd Qu.:0.7085 3rd Qu.:0.994154 3rd Qu.:0.8561
## Max. : 0.42897 Max. :0.9240 Max. :0.994154 Max. :0.9240
##
# Nothing worth going further
# ATT size fraction
Results_lm_ATT=data.frame(KEGG_ID=c("KEGGID"),R2adj=c(0.0),Coef=c(0.0), pvallm=c(0.0),
corspear=c(0.0),pvalspear=c(0.0))
# We need to match orders of row names before computing any stats :
KEGG_matrix_ATT=KEGG_matrix_ATT[match(meta_nickel_ATT$ACE_seq_name,row.names(KEGG_matrix_ATT)),]
for (i in c(1:ncol(KEGG_matrix_ATT))) {
res=lm(KEGG_matrix_ATT[,i]~meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE)
agc_id=names(KEGG_matrix_ATT)[i]
R2=summary(res)$adj.r.squared
pvallm=lmp(res)
slope=res$coefficients[2]
corspear=cor(KEGG_matrix_ATT[,i],meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE,method = "spearman")
pvalspear=perm_test(KEGG_matrix_ATT[,i],meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE,method = "Spearman", alternative = "two.sided")$p.value
Results_lm_ATT[i,]=c(agc_id,R2,slope,pvallm,corspear,pvalspear)
}
Results_lm_ATT$KEGG_ID=as.factor(Results_lm_ATT$KEGG_ID)
Results_lm_ATT$R2adj=as.numeric(Results_lm_ATT$R2adj)
Results_lm_ATT$Coef=as.numeric(Results_lm_ATT$Coef)
Results_lm_ATT$pvallm=as.numeric(Results_lm_ATT$pvallm)
Results_lm_ATT$pvallm.adj=p.adjust(Results_lm_ATT$pvallm, method="BH")
Results_lm_ATT$corspear=as.numeric(Results_lm_ATT$corspear)
Results_lm_ATT$pvalspear=as.numeric(Results_lm_ATT$pvalspear)
Results_lm_ATT$pvalspear.adj=p.adjust(Results_lm_ATT$pvalspear, method="BH")
summary(Results_lm_ATT)
## KEGG_ID R2adj Coef pvallm
## K03187 :1 Min. :-0.06242 Min. :-8.619e-06 Min. :0.02811
## K03188 :1 1st Qu.:-0.03605 1st Qu.:-1.208e-06 1st Qu.:0.12093
## K03188,K03190:1 Median : 0.05973 Median :-3.780e-07 Median :0.16855
## K03189 :1 Mean : 0.04839 Mean : 6.287e-06 Mean :0.31483
## K03190 :1 3rd Qu.: 0.09009 3rd Qu.: 1.436e-05 3rd Qu.:0.53176
## K03190,K03188:1 Max. : 0.22120 Max. : 3.738e-05 Max. :0.97214
## (Other) :7
## corspear pvalspear pvallm.adj pvalspear.adj
## Min. :-0.46365 Min. :0.0000 Min. :0.2916 Min. :0.0000
## 1st Qu.:-0.29943 1st Qu.:0.0910 1st Qu.:0.2916 1st Qu.:0.2232
## Median : 0.04182 Median :0.1350 Median :0.2916 Median :0.2507
## Mean : 0.05470 Mean :0.3202 Mean :0.4533 Mean :0.4285
## 3rd Qu.: 0.37242 3rd Qu.:0.3710 3rd Qu.:0.6913 3rd Qu.:0.4823
## Max. : 0.49690 Max. :0.9770 Max. :0.9721 Max. :0.9770
##
Results_lm_ATT[Results_lm_ATT$pvalspear.adj<0.05,] # This KO is absent everywhere but one sample
## KEGG_ID R2adj Coef pvallm corspear pvalspear
## 11 K03190,K03188 0.06822015 -4.074998e-07 0.153546 -0.3975649 0
## pvallm.adj pvalspear.adj
## 11 0.2915945 0
# Nothing worth going further
EggNOG_IDs=unique(AnnotFull$best_OG_desc)[!is.na(unique(AnnotFull$best_OG_desc))]
#Initiate matrix :
EggNOG_matrix_FL=GeneMat_Nickel_FL[,1:length(EggNOG_IDs)]
#Fill matrix
for (i in c(1:length(EggNOG_IDs))) {
EggNOG_matrix_FL[,i]=rowSums(GeneMat_Nickel_FL[,which(names(GeneMat_Nickel_FL) %in% AnnotFull[which(AnnotFull$best_OG_desc==EggNOG_IDs[i]),"Gene_ID"]),drop=F])
names(EggNOG_matrix_FL)[i]=EggNOG_IDs[i]
}
EggNOG_matrix_FL=EggNOG_matrix_FL[,-which(colSums(EggNOG_matrix_FL)==0)]
#
#Initiate matrix :
EggNOG_matrix_ATT=GeneMat_Nickel_ATT[,1:length(EggNOG_IDs)]
#Fill matrix
for (i in c(1:length(EggNOG_IDs))) {
EggNOG_matrix_ATT[,i]=rowSums(GeneMat_Nickel_ATT[,which(names(GeneMat_Nickel_ATT) %in% AnnotFull[which(AnnotFull$best_OG_desc==EggNOG_IDs[i]),"Gene_ID"]), drop=F])
names(EggNOG_matrix_ATT)[i]=EggNOG_IDs[i]
}
EggNOG_matrix_ATT=EggNOG_matrix_ATT[,-which(colSums(EggNOG_matrix_ATT)==0)]
# FL size fraction
Results_lm_FL=data.frame(EggNOG_ID=c("EggNOGID"),R2adj=c(0.0),Coef=c(0.0), pvallm=c(0.0), corspear=c(0.0),pvalspear=c(0.0))
# We need to match orders of row names before computing any stats :
EggNOG_matrix_FL=EggNOG_matrix_FL[match(meta_nickel_FL$ACE_seq_name,row.names(EggNOG_matrix_FL)),]
for (i in c(1:ncol(EggNOG_matrix_FL))) {
res=lm(EggNOG_matrix_FL[,i]~meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE)
agc_id=names(EggNOG_matrix_FL)[i]
R2=summary(res)$adj.r.squared
pvallm=lmp(res)
slope=res$coefficients[2]
corspear=cor(EggNOG_matrix_FL[,i],meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE,method = "spearman")
pvalspear=perm_test(EggNOG_matrix_FL[,i],meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE,method = "Spearman", alternative = "two.sided")$p.value
Results_lm_FL[i,]=c(agc_id,R2,slope,pvallm,corspear,pvalspear)
}
Results_lm_FL$EggNOG_ID=as.factor(Results_lm_FL$EggNOG_ID)
Results_lm_FL$R2adj=as.numeric(Results_lm_FL$R2adj)
Results_lm_FL$Coef=as.numeric(Results_lm_FL$Coef)
Results_lm_FL$pvallm=as.numeric(Results_lm_FL$pvallm)
Results_lm_FL$pvallm.adj=p.adjust(Results_lm_FL$pvallm, method="BH")
Results_lm_FL$corspear=as.numeric(Results_lm_FL$corspear)
Results_lm_FL$pvalspear=as.numeric(Results_lm_FL$pvalspear)
Results_lm_FL$pvalspear.adj=p.adjust(Results_lm_FL$pvalspear, method="BH")
summary(Results_lm_FL)
## EggNOG_ID
## (urease) gamma subunit : 1
## [NiFe]-hydrogenase assembly, chaperone, HybE : 1
## age-dependent response to reactive oxygen species : 1
## Along with HypE, it catalyzes the synthesis of the CN ligands of the active site iron of NiFe -hydrogenases using carbamoylphosphate as a substrate. It functions as a carbamoyl transferase using carbamoylphosphate as a substrate and transferring the carboxamido moiety in an ATP-dependent reaction to the thiolate of the C-terminal cysteine of HypE yielding a protein-S-carboxamide: 1
## Belongs to the carbamoyltransferase HypF family : 1
## Belongs to the membrane fusion protein (MFP) (TC 8.A.1) family / 4 iron, 4 sulfur cluster binding / NADH dehydrogenase : 1
## (Other) :72
## R2adj Coef pvallm corspear
## Min. :-0.03569 Min. :-9.082e-05 Min. :0.001083 Min. :-0.65411
## 1st Qu.:-0.03144 1st Qu.:-3.558e-06 1st Qu.:0.109336 1st Qu.:-0.29824
## Median :-0.00610 Median :-5.484e-08 Median :0.372553 Median :-0.10745
## Mean : 0.03015 Mean :-3.534e-06 Mean :0.417583 Mean :-0.09439
## 3rd Qu.: 0.05647 3rd Qu.: 2.546e-07 3rd Qu.:0.735957 3rd Qu.: 0.09142
## Max. : 0.29740 Max. : 7.691e-05 Max. :0.979386 Max. : 0.59276
##
## pvalspear pvallm.adj pvalspear.adj
## Min. :0.0000 Min. :0.04261 Min. :0.0000
## 1st Qu.:0.0325 1st Qu.:0.41184 1st Qu.:0.1241
## Median :0.2940 Median :0.71348 Median :0.5805
## Mean :0.3643 Mean :0.64521 Mean :0.5282
## 3rd Qu.:0.6265 3rd Qu.:0.93972 3rd Qu.:0.8289
## Max. :0.9950 Max. :0.97939 Max. :0.9950
##
# Some spearman might be worth investigating
Results_lm_FL[which(Results_lm_FL$pvalspear<0.05),] %>% arrange(desc(corspear)) #14 significant according to spearman pre-correction, highest coeff = SOD
## EggNOG_ID
## 1 COG2370 Hydrogenase urease accessory protein
## 2 UreD urease accessory protein
## 3 superoxide dismutase copper chaperone activity
## 4 g t u mismatch-specific dna glycosylase
## 5 Superoxide dismutase
## 6 NDH-1 shuttles electrons from NADH, via FMN and iron- sulfur (Fe-S) centers, to quinones in the respiratory chain. The immediate electron acceptor for the enzyme in this species is believed to be ubiquinone. Couples the redox reaction to proton translocation (for every two electrons transferred, four hydrogen ions are translocated across the cytoplasmic membrane), and thus conserves the redox energy in a proton gradient / NDH-1 shuttles electrons from NADH, via FMN and iron- sulfur (Fe-S) centers, to quinones in the respiratory chain. The immediate electron acceptor for the enzyme in this species is believed to be ubiquinone. Couples the redox reaction to proton translocation (for every two electrons transferred, four hydrogen ions are translocated across the cytoplasmic membrane), and thus conserves the redox energy in a proton gradient
## 7 Respiratory-chain NADH dehydrogenase, 49 Kd subunit
## 8 COG0378 Ni2 -binding GTPase involved in regulation of expression and maturation of urease and hydrogenase
## 9 NDH-1 shuttles electrons from NADH, via FMN and iron- sulfur (Fe-S) centers, to quinones in the respiratory chain. The immediate electron acceptor for the enzyme in this species is believed to be ubiquinone. Couples the redox reaction to proton translocation (for every two electrons transferred, four hydrogen ions are translocated across the cytoplasmic membrane), and thus conserves the redox energy in a proton gradient
## 10 COG2032 Cu Zn superoxide dismutase
## 11 NAD binding
## 12 CobW/HypB/UreG, nucleotide-binding domain
## 13 Urease accessory protein
## 14 (urease) gamma subunit
## 15 radicals which are normally produced within the cells and which are toxic to biological systems
## 16 DNA glycosylase
## 17 NDH-1 shuttles electrons from NADH, via FMN and iron- sulfur (Fe-S) centers, to quinones in the respiratory chain. The immediate electron acceptor for the enzyme in this species is believed to be a menaquinone. Couples the redox reaction to proton translocation (for every two electrons transferred, four hydrogen ions are translocated across the cytoplasmic membrane), and thus conserves the redox energy in a proton gradient
## 18 COG0852 NADH ubiquinone oxidoreductase 27 kD subunit
## 19 regulation of superoxide dismutase activity
## 20 UreF
## 21 nickel cation binding
## 22 UreE urease accessory protein, N-terminal domain
## 23 enzyme active site formation
## R2adj Coef pvallm corspear pvalspear pvallm.adj
## 1 0.195043288 1.264936e-05 0.008450685 0.5927585 0.004 0.08106755
## 2 0.297398683 6.890656e-06 0.001083242 0.4397153 0.023 0.04260634
## 3 0.051670801 1.716544e-06 0.119435451 0.4364991 0.027 0.41184110
## 4 0.085844436 9.301425e-08 0.063848818 0.4180726 0.044 0.31126299
## 5 0.024419948 3.403543e-05 0.199597636 0.4158223 0.035 0.57661539
## 6 -0.013174211 -1.704886e-07 0.436598463 -0.3614642 0.016 0.72482144
## 7 0.022349642 -1.820982e-07 0.207755361 -0.3630656 0.017 0.57874708
## 8 0.001098142 -1.875359e-07 0.318415030 -0.3705649 0.029 0.70213612
## 9 0.006558944 -1.134318e-05 0.284338061 -0.4133322 0.034 0.70213612
## 10 0.072213511 -5.418907e-07 0.081877155 -0.4167865 0.013 0.35480101
## 11 0.207671144 -2.877692e-05 0.006629777 -0.4169510 0.010 0.08106755
## 12 0.202886917 -1.019447e-05 0.007270409 -0.4217123 0.032 0.08106755
## 13 -0.009172066 -8.230703e-06 0.398094262 -0.4443209 0.002 0.72482144
## 14 0.087794733 -9.082360e-05 0.061619842 -0.4469158 0.007 0.31126299
## 15 0.116787439 -1.951891e-05 0.036320384 -0.4577165 0.008 0.25754454
## 16 0.060399273 -1.853228e-05 0.101672825 -0.4913712 0.017 0.41184110
## 17 0.057222607 -3.991676e-06 0.107795538 -0.4938219 0.011 0.41184110
## 18 0.243158446 -5.924054e-06 0.003302969 -0.4978225 0.006 0.06440790
## 19 0.291122059 -5.857010e-07 0.001236706 -0.5209488 0.011 0.04260634
## 20 0.098427002 -8.033860e-05 0.050770311 -0.5241337 0.004 0.30462187
## 21 0.200256225 -4.804867e-05 0.007647473 -0.5381623 0.000 0.08106755
## 22 0.089787774 -8.706360e-05 0.059422730 -0.5509256 0.001 0.31126299
## 23 0.277635619 -2.777215e-05 0.001638705 -0.6541127 0.003 0.04260634
## pvalspear.adj
## 1 0.05200000
## 2 0.10552941
## 3 0.11700000
## 4 0.14921739
## 5 0.12409091
## 6 0.08287500
## 7 0.08287500
## 8 0.11905263
## 9 0.12409091
## 10 0.07800000
## 11 0.07150000
## 12 0.12409091
## 13 0.05200000
## 14 0.06825000
## 15 0.06933333
## 16 0.08287500
## 17 0.07150000
## 18 0.06685714
## 19 0.07150000
## 20 0.05200000
## 21 0.00000000
## 22 0.03900000
## 23 0.05200000
Results_lm_FL[which(Results_lm_FL$pvalspear<0.05 & Results_lm_FL$corspear>0),] %>% arrange(desc(corspear)) # Only 5 have coeff >0 and one passes the adjustment
## EggNOG_ID R2adj Coef
## 1 COG2370 Hydrogenase urease accessory protein 0.19504329 1.264936e-05
## 2 UreD urease accessory protein 0.29739868 6.890656e-06
## 3 superoxide dismutase copper chaperone activity 0.05167080 1.716544e-06
## 4 g t u mismatch-specific dna glycosylase 0.08584444 9.301425e-08
## 5 Superoxide dismutase 0.02441995 3.403543e-05
## pvallm corspear pvalspear pvallm.adj pvalspear.adj
## 1 0.008450685 0.5927585 0.004 0.08106755 0.0520000
## 2 0.001083242 0.4397153 0.023 0.04260634 0.1055294
## 3 0.119435451 0.4364991 0.027 0.41184110 0.1170000
## 4 0.063848818 0.4180726 0.044 0.31126299 0.1492174
## 5 0.199597636 0.4158223 0.035 0.57661539 0.1240909
ggplot() +
geom_smooth(aes(y=EggNOG_matrix_FL[,"COG2370 Hydrogenase urease accessory protein"],x=meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
geom_point(aes(y=EggNOG_matrix_FL[,"COG2370 Hydrogenase urease accessory protein"],x=meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_FL$MertzGlacier), size=3) +
theme_bw() +
labs(y="COG2370 Hydrogenase urease accessory protein", x=expression(delta^60 * Ni ~ "(‰)")) +
theme(text = element_text(size = 16))
ggplot() +
geom_smooth(aes(y=EggNOG_matrix_FL[,"Superoxide dismutase"],x=meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
geom_point(aes(y=EggNOG_matrix_FL[,"Superoxide dismutase"],x=meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_FL$MertzGlacier), size=3) +
theme_bw() +
labs(y="Superoxide dismutase", x=expression(delta^60 * Ni ~ "(‰)")) +
theme(text = element_text(size = 16))
# Again, mertz seems to disturb the relationship. Let's try without it :
Results_lm_FL=data.frame(EggNOG_ID=c("EggNOGID"),R2adj=c(0.0),Coef=c(0.0), pvallm=c(0.0), corspear=c(0.0),pvalspear=c(0.0))
# We need to match orders of row names before computing any stats :
meta_nickel_FL_nomertz<-meta_nickel_FL[-which(meta_nickel_FL$MertzGlacier==TRUE),]
EggNOG_matrix_FL_nomertz=EggNOG_matrix_FL[match(meta_nickel_FL_nomertz$ACE_seq_name,row.names(EggNOG_matrix_FL)),]
for (i in c(1:ncol(EggNOG_matrix_FL_nomertz))) {
res=lm(EggNOG_matrix_FL_nomertz[,i]~meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE)
agc_id=names(EggNOG_matrix_FL_nomertz)[i]
R2=summary(res)$adj.r.squared
pvallm=lmp(res)
slope=res$coefficients[2]
corspear=cor(EggNOG_matrix_FL_nomertz[,i],meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE,method = "spearman")
pvalspear=perm_test(EggNOG_matrix_FL_nomertz[,i],meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE,method = "Spearman", alternative = "two.sided")$p.value
Results_lm_FL[i,]=c(agc_id,R2,slope,pvallm,corspear,pvalspear)
}
Results_lm_FL$EggNOG_ID=as.factor(Results_lm_FL$EggNOG_ID)
Results_lm_FL$R2adj=as.numeric(Results_lm_FL$R2adj)
Results_lm_FL$Coef=as.numeric(Results_lm_FL$Coef)
Results_lm_FL$pvallm=as.numeric(Results_lm_FL$pvallm)
Results_lm_FL$pvallm.adj=p.adjust(Results_lm_FL$pvallm, method="BH")
Results_lm_FL$corspear=as.numeric(Results_lm_FL$corspear)
Results_lm_FL$pvalspear=as.numeric(Results_lm_FL$pvalspear)
Results_lm_FL$pvalspear.adj=p.adjust(Results_lm_FL$pvalspear, method="BH")
summary(Results_lm_FL)
## EggNOG_ID
## (urease) gamma subunit : 1
## [NiFe]-hydrogenase assembly, chaperone, HybE : 1
## age-dependent response to reactive oxygen species : 1
## Along with HypE, it catalyzes the synthesis of the CN ligands of the active site iron of NiFe -hydrogenases using carbamoylphosphate as a substrate. It functions as a carbamoyl transferase using carbamoylphosphate as a substrate and transferring the carboxamido moiety in an ATP-dependent reaction to the thiolate of the C-terminal cysteine of HypE yielding a protein-S-carboxamide: 1
## Belongs to the carbamoyltransferase HypF family : 1
## Belongs to the membrane fusion protein (MFP) (TC 8.A.1) family / 4 iron, 4 sulfur cluster binding / NADH dehydrogenase : 1
## (Other) :72
## R2adj Coef pvallm
## Min. :-0.039996 Min. :-1.227e-04 Min. :0.0004554
## 1st Qu.:-0.036735 1st Qu.:-2.952e-06 1st Qu.:0.0905450
## Median :-0.008351 Median :-3.592e-08 Median :0.3841547
## Mean : 0.036302 Mean :-7.624e-06 Mean :0.4331391
## 3rd Qu.: 0.074792 3rd Qu.: 2.450e-07 3rd Qu.:0.7816091
## Max. : 0.369902 Max. : 7.468e-05 Max. :0.9925182
##
## corspear pvalspear pvallm.adj pvalspear.adj
## Min. :-0.62271 Min. :0.00000 Min. :0.03383 Min. :0.0000
## 1st Qu.:-0.32283 1st Qu.:0.02725 1st Qu.:0.34856 1st Qu.:0.1040
## Median :-0.10804 Median :0.13650 Median :0.74911 Median :0.2691
## Mean :-0.05991 Mean :0.29788 Mean :0.64544 Mean :0.4106
## 3rd Qu.: 0.11287 3rd Qu.:0.58175 3rd Qu.:0.97662 3rd Qu.:0.7536
## Max. : 0.74989 Max. :0.99000 Max. :0.99252 Max. :0.9900
##
# Some spearman might be worth investigating
Results_lm_FL[which(Results_lm_FL$pvalspear<0.05),] %>% arrange(desc(corspear)) #17 significant according to spearman pre-correction, highest coeff = SOD
## EggNOG_ID
## 1 uracil-DNA glycosylase
## 2 Superoxide dismutase
## 3 COG2370 Hydrogenase urease accessory protein
## 4 superoxide dismutase copper chaperone activity
## 5 g t u mismatch-specific dna glycosylase
## 6 Copper chaperone for superoxide dismutase
## 7 urease accessory protein
## 8 urease activity
## 9 Superoxide dismutase Cu-Zn
## 10 Respiratory-chain NADH dehydrogenase, 49 Kd subunit
## 11 NAD binding
## 12 Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 13 (urease) gamma subunit
## 14 COG2032 Cu Zn superoxide dismutase
## 15 Urease accessory protein
## 16 PFAM NADH ubiquinone oxidoreductase-like, 20kDa subunit
## 17 COG0852 NADH ubiquinone oxidoreductase 27 kD subunit
## 18 UreF
## 19 nickel cation binding
## 20 COG0378 Ni2 -binding GTPase involved in regulation of expression and maturation of urease and hydrogenase
## 21 NDH-1 shuttles electrons from NADH, via FMN and iron- sulfur (Fe-S) centers, to quinones in the respiratory chain. The immediate electron acceptor for the enzyme in this species is believed to be a menaquinone. Couples the redox reaction to proton translocation (for every two electrons transferred, four hydrogen ions are translocated across the cytoplasmic membrane), and thus conserves the redox energy in a proton gradient
## 22 belongs to the urease gamma subunit family
## 23 UreE urease accessory protein, N-terminal domain
## 24 regulation of superoxide dismutase activity
## 25 DNA glycosylase
## 26 enzyme active site formation
## R2adj Coef pvallm corspear pvalspear pvallm.adj
## 1 0.338368495 4.044571e-05 0.0008673153 0.7498872 0.000 0.03382530
## 2 0.196218620 7.468500e-05 0.0119621763 0.6565720 0.000 0.14938730
## 3 0.369901707 1.953177e-05 0.0004554090 0.6541244 0.006 0.03382530
## 4 0.157527653 2.990610e-06 0.0230666265 0.6034780 0.005 0.14993307
## 5 0.185979737 1.483644e-07 0.0142571652 0.5363009 0.009 0.14938730
## 6 0.076489185 3.602725e-06 0.0879427789 0.5068467 0.011 0.34297684
## 7 0.113849882 1.292431e-05 0.0475992782 0.4130346 0.042 0.24355801
## 8 0.098908907 6.587840e-06 0.0608471438 0.4105870 0.041 0.24979354
## 9 0.187894257 -8.306746e-08 0.0137982937 -0.3154604 0.034 0.14938730
## 10 -0.008115644 -1.564331e-07 0.3823676781 -0.3220645 0.047 0.74910770
## 11 0.148026538 -2.866255e-05 0.0270365411 -0.3332836 0.018 0.15986740
## 12 0.050253761 -9.979725e-05 0.1357975628 -0.3423598 0.028 0.39526798
## 13 0.058566202 -9.321988e-05 0.1182435594 -0.3596748 0.015 0.38592041
## 14 0.045538208 -5.534945e-07 0.1469586080 -0.3977876 0.025 0.39526798
## 15 -0.026840801 -6.776739e-06 0.5764249665 -0.3981037 0.011 0.93669057
## 16 -0.038289592 -2.414750e-06 0.8408271245 -0.4069322 0.046 0.97661713
## 17 0.164486367 -5.692661e-06 0.0205226755 -0.4135931 0.023 0.14993307
## 18 0.069699954 -8.357698e-05 0.0983516351 -0.4421000 0.027 0.36530607
## 19 0.144454135 -4.852894e-05 0.0286941480 -0.4445476 0.001 0.15986740
## 20 0.001934573 -2.346633e-07 0.3152311006 -0.4504207 0.009 0.70251502
## 21 0.048302938 -4.565129e-06 0.1403018080 -0.4588323 0.021 0.39526798
## 22 0.178290080 -6.370265e-07 0.0162517526 -0.4619605 0.023 0.14938730
## 23 0.061575382 -9.004740e-05 0.1124880800 -0.4619868 0.004 0.38592041
## 24 0.295146639 -6.860529e-07 0.0020121365 -0.4748136 0.024 0.05231555
## 25 0.058311413 -2.194015e-05 0.1187447403 -0.4995596 0.013 0.38592041
## 26 0.239868352 -3.010220e-05 0.0055649296 -0.6227064 0.002 0.10851613
## pvalspear.adj
## 1 0.00000000
## 2 0.00000000
## 3 0.06685714
## 4 0.06500000
## 5 0.07800000
## 6 0.07800000
## 7 0.13650000
## 8 0.13650000
## 9 0.12054545
## 10 0.14100000
## 11 0.10028571
## 12 0.10400000
## 13 0.09000000
## 14 0.10263158
## 15 0.07800000
## 16 0.14100000
## 17 0.10263158
## 18 0.10400000
## 19 0.02600000
## 20 0.07800000
## 21 0.10263158
## 22 0.10263158
## 23 0.06240000
## 24 0.10263158
## 25 0.08450000
## 26 0.03900000
Results_lm_FL[which(Results_lm_FL$pvalspear<0.05 & Results_lm_FL$corspear>0),] %>% arrange(desc(corspear)) # 6 have coeff >0 and 3 passes the adjustment
## EggNOG_ID R2adj Coef
## 1 uracil-DNA glycosylase 0.33836849 4.044571e-05
## 2 Superoxide dismutase 0.19621862 7.468500e-05
## 3 COG2370 Hydrogenase urease accessory protein 0.36990171 1.953177e-05
## 4 superoxide dismutase copper chaperone activity 0.15752765 2.990610e-06
## 5 g t u mismatch-specific dna glycosylase 0.18597974 1.483644e-07
## 6 Copper chaperone for superoxide dismutase 0.07648919 3.602725e-06
## 7 urease accessory protein 0.11384988 1.292431e-05
## 8 urease activity 0.09890891 6.587840e-06
## pvallm corspear pvalspear pvallm.adj pvalspear.adj
## 1 0.0008673153 0.7498872 0.000 0.0338253 0.00000000
## 2 0.0119621763 0.6565720 0.000 0.1493873 0.00000000
## 3 0.0004554090 0.6541244 0.006 0.0338253 0.06685714
## 4 0.0230666265 0.6034780 0.005 0.1499331 0.06500000
## 5 0.0142571652 0.5363009 0.009 0.1493873 0.07800000
## 6 0.0879427789 0.5068467 0.011 0.3429768 0.07800000
## 7 0.0475992782 0.4130346 0.042 0.2435580 0.13650000
## 8 0.0608471438 0.4105870 0.041 0.2497935 0.13650000
#Superoxide dismutase
#uracil-DNA glycosylase (narrow og desc = UreE urease accessory protein)
#COG2370 Hydrogenase urease accessory protein
EggNOG_matrix_FL_nomertz[,"uracil-DNA glycosylase"]
## [1] 3.520051e-06 2.258165e-05 3.537414e-06 2.414043e-05 5.562218e-06
## [6] 2.442940e-05 3.706262e-06 1.572073e-05 9.762735e-07 3.645000e-07
## [11] 3.371216e-06 5.168001e-06 5.860243e-06 5.161933e-06 1.323165e-07
## [16] 1.717060e-07 2.619178e-06 2.616938e-06 6.378028e-06 6.264758e-06
## [21] 6.266489e-06 5.205923e-07 3.767419e-06 3.307610e-06 2.041500e-05
## [26] 1.762316e-05 1.778057e-05
EggNOG_matrix_FL_nomertz[,"COG2370 Hydrogenase urease accessory protein"]
## [1] 2.600635e-06 9.551650e-06 1.267744e-06 9.021162e-06 1.331799e-06
## [6] 1.167964e-05 1.174261e-06 2.273760e-06 1.722836e-07 0.000000e+00
## [11] 5.805984e-06 4.772365e-06 8.080032e-06 5.807175e-06 1.323165e-07
## [16] 1.287795e-07 4.622078e-07 1.327433e-06 7.395534e-06 8.015778e-06
## [21] 7.465934e-06 6.941231e-08 1.753155e-06 1.699323e-06 8.540969e-06
## [26] 7.907082e-06 6.828448e-06
# 2 have adj p-values<0.01
ggplot() +
geom_smooth(aes(y=EggNOG_matrix_FL_nomertz[,"Superoxide dismutase"]*100,x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
geom_point(aes(y=EggNOG_matrix_FL_nomertz[,"Superoxide dismutase"]*100,x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE), size=3) +
theme_bw() +
labs(y="Superoxide dismutase (EggNOG best OG, % of coverage)", x=expression(delta^60 * Ni ~ "(‰)")) +
theme(text = element_text(size = 16))
ggplot() +
geom_smooth(aes(y=EggNOG_matrix_FL_nomertz[,"COG2370 Hydrogenase urease accessory protein"],x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
geom_point(aes(y=EggNOG_matrix_FL_nomertz[,"COG2370 Hydrogenase urease accessory protein"],x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE), size=3) +
theme_bw() +
labs(y="COG2370 Hydrogenase urease accessory protein (EggNOG best OG description)", x=expression(delta^60 * Ni ~ "(‰)")) +
theme(text = element_text(size = 16))
ggplot() +
geom_smooth(aes(y=EggNOG_matrix_FL_nomertz[,"uracil-DNA glycosylase"],x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
geom_point(aes(y=EggNOG_matrix_FL_nomertz[,"uracil-DNA glycosylase"],x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_FL_nomertz$MertzGlacier), size=3) +
theme_bw() +
labs(y="uracil-DNA glycosylase (EggNOG best OG description)", x=expression(delta^60 * Ni ~ "(‰)")) +
theme(text = element_text(size = 16))
# ATT size fraction
Results_lm_ATT=data.frame(EggNOG_ID=c("EggNOGID"),R2adj=c(0.0),Coef=c(0.0), pvallm=c(0.0), corspear=c(0.0),pvalspear=c(0.0))
# We need to match orders of row names before computing any stats :
EggNOG_matrix_ATT=EggNOG_matrix_ATT[match(meta_nickel_ATT$ACE_seq_name,row.names(EggNOG_matrix_ATT)),]
for (i in c(1:ncol(EggNOG_matrix_ATT))) {
res=lm(EggNOG_matrix_ATT[,i]~meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE)
agc_id=names(EggNOG_matrix_ATT)[i]
R2=summary(res)$adj.r.squared
pvallm=lmp(res)
slope=res$coefficients[2]
corspear=cor(EggNOG_matrix_ATT[,i],meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE,method = "spearman")
pvalspear=perm_test(EggNOG_matrix_ATT[,i],meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE,method = "Spearman", alternative = "two.sided")$p.value
Results_lm_ATT[i,]=c(agc_id,R2,slope,pvallm,corspear,pvalspear)
}
Results_lm_ATT$EggNOG_ID=as.factor(Results_lm_ATT$EggNOG_ID)
Results_lm_ATT$R2adj=as.numeric(Results_lm_ATT$R2adj)
Results_lm_ATT$Coef=as.numeric(Results_lm_ATT$Coef)
Results_lm_ATT$pvallm=as.numeric(Results_lm_ATT$pvallm)
Results_lm_ATT$pvallm.adj=p.adjust(Results_lm_ATT$pvallm, method="BH")
Results_lm_ATT$corspear=as.numeric(Results_lm_ATT$corspear)
Results_lm_ATT$pvalspear=as.numeric(Results_lm_ATT$pvalspear)
Results_lm_ATT$pvalspear.adj=p.adjust(Results_lm_ATT$pvalspear, method="BH")
summary(Results_lm_ATT)
## EggNOG_ID
## (urease) gamma subunit : 1
## [NiFe]-hydrogenase assembly, chaperone, HybE : 1
## age-dependent response to reactive oxygen species : 1
## Along with HypE, it catalyzes the synthesis of the CN ligands of the active site iron of NiFe -hydrogenases using carbamoylphosphate as a substrate. It functions as a carbamoyl transferase using carbamoylphosphate as a substrate and transferring the carboxamido moiety in an ATP-dependent reaction to the thiolate of the C-terminal cysteine of HypE yielding a protein-S-carboxamide: 1
## Belongs to the NiFe NiFeSe hydrogenase large subunit family : 1
## Belongs to the urease : 1
## (Other) :55
## R2adj Coef pvallm
## Min. :-0.062493 Min. :-1.170e-05 Min. :0.0005066
## 1st Qu.:-0.038402 1st Qu.:-1.024e-06 1st Qu.:0.1771164
## Median :-0.007245 Median :-8.870e-08 Median :0.3627533
## Mean : 0.019979 Mean : 2.372e-06 Mean :0.4090096
## 3rd Qu.: 0.055220 3rd Qu.: 9.330e-07 3rd Qu.:0.5508451
## Max. : 0.511998 Max. : 3.193e-05 Max. :0.9920935
##
## corspear pvalspear pvallm.adj pvalspear.adj
## Min. :-0.54490 Min. :0.0000 Min. :0.0309 Min. :0.0000
## 1st Qu.:-0.29720 1st Qu.:0.0990 1st Qu.:0.6582 1st Qu.:0.3538
## Median :-0.11693 Median :0.3300 Median :0.6888 Median :0.6222
## Mean :-0.07477 Mean :0.3797 Mean :0.7156 Mean :0.5898
## 3rd Qu.: 0.08273 3rd Qu.:0.6460 3rd Qu.:0.7203 3rd Qu.:0.8567
## Max. : 0.71718 Max. :0.9870 Max. :0.9921 Max. :0.9870
##
# Doesn't look as promising
Results_lm_ATT[which(Results_lm_ATT$pvalspear<0.05),] %>% arrange(desc(corspear))
## EggNOG_ID
## 1 UreD urease accessory protein
## 2 Facilitates the functional incorporation of the urease nickel metallocenter. This process requires GTP hydrolysis, probably effectuated by UreG
## 3 COG1573 Uracil-DNA glycosylase
## 4 NDH-1 shuttles electrons from NADH, via FMN and iron- sulfur (Fe-S) centers, to quinones in the respiratory chain. The immediate electron acceptor for the enzyme in this species is believed to be ubiquinone. Couples the redox reaction to proton translocation (for every two electrons transferred, four hydrogen ions are translocated across the cytoplasmic membrane), and thus conserves the redox energy in a proton gradient
## 5 UreF
## 6 radicals which are normally produced within the cells and which are toxic to biological systems
## 7 UreE urease accessory protein, N-terminal domain
## 8 NAD binding
## R2adj Coef pvallm corspear pvalspear pvallm.adj
## 1 0.511998250 2.432805e-05 0.0005066074 0.7171848 0.007 0.03090305
## 2 0.085358581 1.678385e-05 0.1273282354 0.5327827 0.026 0.65817014
## 3 0.199355266 1.001660e-07 0.0361180764 0.3975649 0.000 0.55080066
## 4 0.009245237 -6.613806e-07 0.2977119684 -0.4054589 0.035 0.68876661
## 5 0.010834669 -1.566047e-06 0.2922290515 -0.4394834 0.015 0.68876661
## 6 0.047046889 -1.169910e-05 0.1938664332 -0.4423796 0.039 0.65817014
## 7 -0.021142608 -1.189283e-06 0.4326226215 -0.4848494 0.008 0.68876661
## 8 0.019298921 -2.570436e-06 0.2649608541 -0.5449049 0.005 0.68876661
## pvalspear.adj
## 1 0.1220000
## 2 0.2643333
## 3 0.0000000
## 4 0.2973750
## 5 0.1830000
## 6 0.2973750
## 7 0.1220000
## 8 0.1220000
Results_lm_ATT[which(Results_lm_ATT$pvalspear<0.05 & Results_lm_ATT$corspear>0),] %>% arrange(desc(corspear)) # 6 have coeff >0 and 3 passes the adjustment
## EggNOG_ID
## 1 UreD urease accessory protein
## 2 Facilitates the functional incorporation of the urease nickel metallocenter. This process requires GTP hydrolysis, probably effectuated by UreG
## 3 COG1573 Uracil-DNA glycosylase
## R2adj Coef pvallm corspear pvalspear pvallm.adj
## 1 0.51199825 2.432805e-05 0.0005066074 0.7171848 0.007 0.03090305
## 2 0.08535858 1.678385e-05 0.1273282354 0.5327827 0.026 0.65817014
## 3 0.19935527 1.001660e-07 0.0361180764 0.3975649 0.000 0.55080066
## pvalspear.adj
## 1 0.1220000
## 2 0.2643333
## 3 0.0000000
ggplot() +
geom_smooth(aes(y=EggNOG_matrix_ATT[,"UreD urease accessory protein"],x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
geom_point(aes(y=EggNOG_matrix_ATT[,"UreD urease accessory protein"],x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_ATT$MertzGlacier), size=3) +
theme_bw() +
labs(y="UreD urease accessory protein (EggNOG annotation)", x=expression(delta^60 * Ni ~ "(‰)")) +
theme(text = element_text(size = 16))
ggplot() +
geom_smooth(aes(y=EggNOG_matrix_ATT[,"Facilitates the functional incorporation of the urease nickel metallocenter. This process requires GTP hydrolysis, probably effectuated by UreG"],x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
geom_point(aes(y=EggNOG_matrix_ATT[,"Facilitates the functional incorporation of the urease nickel metallocenter. This process requires GTP hydrolysis, probably effectuated by UreG"],x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_ATT$MertzGlacier), size=3) +
theme_bw() +
labs(y="UreD urease accessory protein (EggNOG annotation)", x=expression(delta^60 * Ni ~ "(‰)")) +
theme(text = element_text(size = 16))
ggplot() +
geom_smooth(aes(y=EggNOG_matrix_ATT[,"COG1573 Uracil-DNA glycosylase"],x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
geom_point(aes(y=EggNOG_matrix_ATT[,"COG1573 Uracil-DNA glycosylase"],x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_ATT$MertzGlacier), size=3) +
theme_bw() +
labs(y="COG1573 Uracil-DNA glycosylase (EggNOG annotation)", x=expression(delta^60 * Ni ~ "(‰)")) +
theme(text = element_text(size = 16))
# --> Basically present in one spot only, irrelevant
# --> Clean figures with the most interesting ones
g1 <- ggplot() +
geom_smooth(aes(y=EggNOG_matrix_FL_nomertz[,"Superoxide dismutase"]*100,x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2, col="grey20") +
geom_point(aes(y=EggNOG_matrix_FL[,"Superoxide dismutase"]*100,x=meta_nickel_FL[, "Ni_60_58_D_DELTA_BOTTLE"], shape=meta_nickel_FL[, "MertzGlacier"], col=meta_nickel_FL[, "MertzGlacier"]), size=3) +
scale_color_manual(values=c("black","skyblue")) +
theme_bw() +
labs(y="Superoxide dismutase\n(EggNOG best OG description, % of coverage)", x=expression(delta^60 * Ni ~ "(‰)"), shape="Mertz Glacier", col="Mertz Glacier") +
theme(text = element_text(size = 16))
g1
g2 <- ggplot() +
geom_smooth(aes(y=EggNOG_matrix_FL_nomertz[,"COG2370 Hydrogenase urease accessory protein"]*100,x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2, col="grey20") +
geom_point(aes(y=EggNOG_matrix_FL[,"COG2370 Hydrogenase urease accessory protein"]*100,x=meta_nickel_FL[, "Ni_60_58_D_DELTA_BOTTLE"], shape=meta_nickel_FL[, "MertzGlacier"], col=meta_nickel_FL[, "MertzGlacier"]), size=3) +
scale_color_manual(values=c("black","skyblue")) +
theme_bw() +
labs(y="COG2370 Hydrogenase urease accessory protein\n(EggNOG best OG description, % of coverage)", x=expression(delta^60 * Ni ~ "(‰)"), shape= "Mertz Glacier", col="Mertz Glacier") +
theme(text = element_text(size = 16))
g2
g3 <- ggplot() +
geom_smooth(aes(y=EggNOG_matrix_ATT[,"UreD urease accessory protein"]*100,x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2, col="grey20") +
geom_point(aes(y=EggNOG_matrix_ATT[,"UreD urease accessory protein"]*100,x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_ATT$MertzGlacier), size=3) +
theme_bw() +
labs(y="UreD urease accessory protein\n(EggNOG best OG description, % of coverage)", x=expression(delta^60 * Ni ~ "(‰)"), shape= "Mertz Glacier") +
theme(text = element_text(size = 16))
g3
g4 <- ggplot() +
geom_smooth(aes(y=EggNOG_matrix_ATT[,"Facilitates the functional incorporation of the urease nickel metallocenter. This process requires GTP hydrolysis, probably effectuated by UreG"]*100,x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2, col="grey20") +
geom_point(aes(y=EggNOG_matrix_ATT[,"Facilitates the functional incorporation of the urease nickel metallocenter. This process requires GTP hydrolysis, probably effectuated by UreG"]*100,x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_ATT$MertzGlacier), size=3) +
theme_bw() +
labs(y="Facilitates the functional incorporation\nof the urease nickel metallocenter\n(EggNOG best OG description, % of coverage)", x=expression(delta^60 * Ni ~ "(‰)"), shape= "Mertz Glacier") +
theme(text = element_text(size = 16))
g4
PFAM_IDs=unique(AnnotFull$PFAMs)[!is.na(unique(AnnotFull$PFAMs))]
#Initiate matrix :
PFAM_matrix_FL=GeneMat_Nickel_FL[,1:length(PFAM_IDs)]
#Fill matrix
for (i in c(1:length(PFAM_IDs))) {
PFAM_matrix_FL[,i]=rowSums(GeneMat_Nickel_FL[,which(names(GeneMat_Nickel_FL) %in% AnnotFull[which(AnnotFull$PFAMs==PFAM_IDs[i]),"Gene_ID"]),drop=F])
names(PFAM_matrix_FL)[i]=PFAM_IDs[i]
}
PFAM_matrix_FL=PFAM_matrix_FL[,-which(colSums(PFAM_matrix_FL)==0)]
#
#Initiate matrix :
PFAM_matrix_ATT=GeneMat_Nickel_ATT[,1:length(PFAM_IDs)]
#Fill matrix
for (i in c(1:length(PFAM_IDs))) {
PFAM_matrix_ATT[,i]=rowSums(GeneMat_Nickel_ATT[,which(names(GeneMat_Nickel_ATT) %in% AnnotFull[which(AnnotFull$PFAMs==PFAM_IDs[i]),"Gene_ID"]), drop=F])
names(PFAM_matrix_ATT)[i]=PFAM_IDs[i]
}
PFAM_matrix_ATT=PFAM_matrix_ATT[,-which(colSums(PFAM_matrix_ATT)==0)]
# FL size fraction
Results_lm_FL=data.frame(PFAM_ID=c("PFAM_ID"),R2adj=c(0.0),Coef=c(0.0), pvallm=c(0.0), corspear=c(0.0),pvalspear=c(0.0))
# We need to match orders of row names before computing any stats :
PFAM_matrix_FL=PFAM_matrix_FL[match(meta_nickel_FL$ACE_seq_name,row.names(PFAM_matrix_FL)),]
for (i in c(1:ncol(PFAM_matrix_FL))) {
res=lm(PFAM_matrix_FL[,i]~meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE)
agc_id=names(PFAM_matrix_FL)[i]
R2=summary(res)$adj.r.squared
pvallm=lmp(res)
slope=res$coefficients[2]
corspear=cor(PFAM_matrix_FL[,i],meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE,method = "spearman")
pvalspear=perm_test(PFAM_matrix_FL[,i],meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE,method = "Spearman", alternative = "two.sided")$p.value
Results_lm_FL[i,]=c(agc_id,R2,slope,pvallm,corspear,pvalspear)
}
Results_lm_FL$PFAM_ID=as.factor(Results_lm_FL$PFAM_ID)
Results_lm_FL$R2adj=as.numeric(Results_lm_FL$R2adj)
Results_lm_FL$Coef=as.numeric(Results_lm_FL$Coef)
Results_lm_FL$pvallm=as.numeric(Results_lm_FL$pvallm)
Results_lm_FL$pvallm.adj=p.adjust(Results_lm_FL$pvallm, method="BH")
Results_lm_FL$corspear=as.numeric(Results_lm_FL$corspear)
Results_lm_FL$pvalspear=as.numeric(Results_lm_FL$pvalspear)
Results_lm_FL$pvalspear.adj=p.adjust(Results_lm_FL$pvalspear, method="BH")
summary(Results_lm_FL)
## PFAM_ID
## AAA_11,AAA_12,ABC_membrane,ABC_tran,ACC_central,Biotin_carb_C,Biotin_carb_N,Biotin_lipoyl,Biotin_lipoyl_2,CPSase_L_D2,Carboxyl_trans,Complex1_49kDa,DUF3347,DUF4347,DUF559,HlyD_3,HlyD_D23,NiFeSe_Hases,Oxidored_q6,Poly_export,RNA_pol_Rpb1_5,SLBB,YXWGXW : 1
## Abhydrolase_2,Esterase_phd : 1
## Acetyltransf_1,Acetyltransf_7,DAO,DUF1858,DUF1990,DoxX_3,EF-hand_1,EF-hand_5,EF-hand_7,Epimerase,FAD_binding_6,Fer2,Fer2_4,Fer2_BFD,Fer4_11,Ferric_reduct,NAD_binding_1,NAD_binding_10,NAD_binding_4,NIR_SIR,NIR_SIR_ferr,NO_synthase,NiFe_hyd_SSU_C,NifU,NifU_N,NmrA,Oxidored_q6,Pyr_redox_2,Reductase_C,Rieske_2,SUKH_5,Semialdhyde_dh,SnoaL_4,SusD-like_3,SusD_RagB,TP_methylase,TrkA_N,cNMP_binding: 1
## Acylphosphatase,FGGY_C,HycI,Sua5_yciO_yrdC,zf-HYPF : 1
## Acylphosphatase,FGGY_C,Peptidase_M22,Sua5_yciO_yrdC,zf-HYPF : 1
## Acylphosphatase,Peptidase_M22,Sua5_yciO_yrdC,zf-HYPF : 1
## (Other) :62
## R2adj Coef pvallm corspear
## Min. :-0.03571 Min. :-1.018e-04 Min. :0.0000656 Min. :-0.6365
## 1st Qu.:-0.03311 1st Qu.:-1.980e-06 1st Qu.:0.2360188 1st Qu.:-0.2105
## Median :-0.01381 Median :-3.108e-08 Median :0.4432562 Median :-0.0942
## Mean : 0.02077 Mean :-4.017e-06 Mean :0.4807697 Mean :-0.0588
## 3rd Qu.: 0.01583 3rd Qu.: 2.704e-08 3rd Qu.:0.7924878 3rd Qu.: 0.1075
## Max. : 0.41941 Max. : 5.569e-05 Max. :0.9944291 Max. : 0.4498
##
## pvalspear pvallm.adj pvalspear.adj
## Min. :0.0000 Min. :0.004461 Min. :0.0000
## 1st Qu.:0.1010 1st Qu.:0.806988 1st Qu.:0.3686
## Median :0.3885 Median :0.873662 Median :0.7532
## Mean :0.3772 Mean :0.773306 Mean :0.5999
## 3rd Qu.:0.6000 3rd Qu.:0.969673 3rd Qu.:0.7564
## Max. :0.9880 Max. :0.994429 Max. :0.9880
##
# Some spearman might be worth investigating
Results_lm_FL[which(Results_lm_FL$pvalspear<0.05),] %>% arrange(desc(corspear)) #A few significant according to spearman pre-correction
## PFAM_ID R2adj
## 1 HMA,PB1,Sod_Cu,TPR_1,XPG_I_2 0.052903447
## 2 Sod_Ni 0.066270307
## 3 Complex1_30kDa,Complex1_49kDa,NiFeSe_Hases,Oxidored_q6 -0.006919077
## 4 Urease_gamma 0.093195141
## 5 Complex1_49kDa,NiFeSe_Hases 0.002600695
## 6 Complex1_30kDa,Complex1_49kDa,NiFeSe_Hases 0.273567894
## 7 NiFe-hyd_HybE -0.023604992
## 8 Glycos_trans_3N,UreE_C,UreE_N 0.253073478
## 9 Amidohydro_1,Urease_alpha,Urease_beta,Urease_gamma 0.419412752
## Coef pvallm corspear pvalspear pvallm.adj pvalspear.adj
## 1 1.721716e-06 1.167422e-01 0.4497786 0.022 0.610651531 0.1870000
## 2 3.981461e-05 9.128547e-02 0.4378836 0.022 0.517284303 0.1870000
## 3 -2.147331e-07 3.785046e-01 -0.3206927 0.048 0.830268202 0.3609231
## 4 -1.017815e-04 5.584680e-02 -0.3489698 0.020 0.379758258 0.1870000
## 5 -1.041466e-05 3.085543e-01 -0.4657873 0.004 0.806988141 0.0680000
## 6 -3.959465e-05 1.782399e-03 -0.5067443 0.001 0.050963816 0.0340000
## 7 -1.315814e-07 5.695261e-01 -0.5076382 0.007 0.926780457 0.0952000
## 8 -9.727454e-06 2.707013e-03 -0.5712114 0.003 0.050963816 0.0680000
## 9 -1.485475e-05 6.560013e-05 -0.6365281 0.000 0.004460809 0.0000000
Results_lm_FL[which(Results_lm_FL$pvalspear<0.05 & Results_lm_FL$corspear>0),] %>% arrange(desc(corspear)) # Only 2 have coeff >0 and none passes the adjustment
## PFAM_ID R2adj Coef pvallm corspear
## 1 HMA,PB1,Sod_Cu,TPR_1,XPG_I_2 0.05290345 1.721716e-06 0.11674220 0.4497786
## 2 Sod_Ni 0.06627031 3.981461e-05 0.09128547 0.4378836
## pvalspear pvallm.adj pvalspear.adj
## 1 0.022 0.6106515 0.187
## 2 0.022 0.5172843 0.187
ggplot() +
geom_smooth(aes(y=PFAM_matrix_FL[,"Sod_Ni"]*100,x=meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
geom_point(aes(y=PFAM_matrix_FL[,"Sod_Ni"]*100,x=meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_FL$MertzGlacier), size=3) +
theme_bw() +
labs(y="Sod_Ni", x=expression(delta^60 * Ni ~ "(‰)")) +
theme(text = element_text(size = 16))
# Seems nice, maybe significant without Mertz
ggplot() +
geom_smooth(aes(y=PFAM_matrix_FL[,"HMA,PB1,Sod_Cu,TPR_1,XPG_I_2"]*100,x=meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
geom_point(aes(y=PFAM_matrix_FL[,"HMA,PB1,Sod_Cu,TPR_1,XPG_I_2"]*100,x=meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_FL$MertzGlacier), size=3) +
theme_bw() +
labs(y="HMA,PB1,Sod_Cu,TPR_1,XPG_I_2", x=expression(delta^60 * Ni ~ "(‰)")) +
theme(text = element_text(size = 16))
# Not particularly high in station 8/11
# Again, mertz seems to disturb the relationship. Let's try without it :
Results_lm_FL=data.frame(PFAM_ID=c("PFAM_ID"),R2adj=c(0.0),Coef=c(0.0), pvallm=c(0.0), corspear=c(0.0),pvalspear=c(0.0))
PFAM_matrix_FL_nomertz=PFAM_matrix_FL[match(meta_nickel_FL_nomertz$ACE_seq_name,row.names(PFAM_matrix_FL)),]
for (i in c(1:ncol(PFAM_matrix_FL_nomertz))) {
res=lm(PFAM_matrix_FL_nomertz[,i]~meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE)
agc_id=names(PFAM_matrix_FL_nomertz)[i]
R2=summary(res)$adj.r.squared
pvallm=lmp(res)
slope=res$coefficients[2]
corspear=cor(PFAM_matrix_FL_nomertz[,i],meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE,method = "spearman")
pvalspear=perm_test(PFAM_matrix_FL_nomertz[,i],meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE,method = "Spearman", alternative = "two.sided")$p.value
Results_lm_FL[i,]=c(agc_id,R2,slope,pvallm,corspear,pvalspear)
}
Results_lm_FL$PFAM_ID=as.factor(Results_lm_FL$PFAM_ID)
Results_lm_FL$R2adj=as.numeric(Results_lm_FL$R2adj)
Results_lm_FL$Coef=as.numeric(Results_lm_FL$Coef)
Results_lm_FL$pvallm=as.numeric(Results_lm_FL$pvallm)
Results_lm_FL$pvallm.adj=p.adjust(Results_lm_FL$pvallm, method="BH")
Results_lm_FL$corspear=as.numeric(Results_lm_FL$corspear)
Results_lm_FL$pvalspear=as.numeric(Results_lm_FL$pvalspear)
Results_lm_FL$pvalspear.adj=p.adjust(Results_lm_FL$pvalspear, method="BH")
summary(Results_lm_FL)
## PFAM_ID
## AAA_11,AAA_12,ABC_membrane,ABC_tran,ACC_central,Biotin_carb_C,Biotin_carb_N,Biotin_lipoyl,Biotin_lipoyl_2,CPSase_L_D2,Carboxyl_trans,Complex1_49kDa,DUF3347,DUF4347,DUF559,HlyD_3,HlyD_D23,NiFeSe_Hases,Oxidored_q6,Poly_export,RNA_pol_Rpb1_5,SLBB,YXWGXW : 1
## Abhydrolase_2,Esterase_phd : 1
## Acetyltransf_1,Acetyltransf_7,DAO,DUF1858,DUF1990,DoxX_3,EF-hand_1,EF-hand_5,EF-hand_7,Epimerase,FAD_binding_6,Fer2,Fer2_4,Fer2_BFD,Fer4_11,Ferric_reduct,NAD_binding_1,NAD_binding_10,NAD_binding_4,NIR_SIR,NIR_SIR_ferr,NO_synthase,NiFe_hyd_SSU_C,NifU,NifU_N,NmrA,Oxidored_q6,Pyr_redox_2,Reductase_C,Rieske_2,SUKH_5,Semialdhyde_dh,SnoaL_4,SusD-like_3,SusD_RagB,TP_methylase,TrkA_N,cNMP_binding: 1
## Acylphosphatase,FGGY_C,HycI,Sua5_yciO_yrdC,zf-HYPF : 1
## Acylphosphatase,FGGY_C,Peptidase_M22,Sua5_yciO_yrdC,zf-HYPF : 1
## Acylphosphatase,Peptidase_M22,Sua5_yciO_yrdC,zf-HYPF : 1
## (Other) :62
## R2adj Coef pvallm corspear
## Min. :-0.03995 Min. :-1.282e-04 Min. :0.000219 Min. :-0.57852
## 1st Qu.:-0.03633 1st Qu.:-1.191e-06 1st Qu.:0.147754 1st Qu.:-0.24457
## Median :-0.02600 Median :-1.265e-08 Median :0.564541 Median :-0.05276
## Mean : 0.02738 Mean :-8.785e-06 Mean :0.482931 Mean :-0.01712
## 3rd Qu.: 0.04522 3rd Qu.: 1.165e-07 3rd Qu.:0.768658 3rd Qu.: 0.12981
## Max. : 0.40412 Max. : 8.104e-05 Max. :0.972520 Max. : 0.72541
##
## pvalspear pvallm.adj pvalspear.adj
## Min. :0.0000 Min. :0.01489 Min. :0.0000
## 1st Qu.:0.0640 1st Qu.:0.56254 1st Qu.:0.2447
## Median :0.2455 Median :0.95037 Median :0.4833
## Mean :0.3581 Mean :0.74105 Mean :0.5199
## 3rd Qu.:0.6020 3rd Qu.:0.95037 3rd Qu.:0.7951
## Max. :0.9970 Max. :0.97252 Max. :0.9970
##
# Some spearman might be worth investigating
Results_lm_FL[which(Results_lm_FL$pvalspear<0.05),] %>% arrange(desc(corspear)) #14 significant according to spearman pre-correction, highest coeff = LysE,UDG and SOD_Ni
## PFAM_ID
## 1 LysE,UDG
## 2 Sod_Ni
## 3 HMA,PB1,Sod_Cu,TPR_1,XPG_I_2
## 4 HupE_UreJ
## 5 UreD,UreF
## 6 HMA,Sod_Cu
## 7 Amidohydro_1,DUF599,Fungal_trans,HMG_box,Urease_alpha,Urease_beta,Urease_gamma
## 8 Acetyltransf_1,Acetyltransf_7,DAO,DUF1858,DUF1990,DoxX_3,EF-hand_1,EF-hand_5,EF-hand_7,Epimerase,FAD_binding_6,Fer2,Fer2_4,Fer2_BFD,Fer4_11,Ferric_reduct,NAD_binding_1,NAD_binding_10,NAD_binding_4,NIR_SIR,NIR_SIR_ferr,NO_synthase,NiFe_hyd_SSU_C,NifU,NifU_N,NmrA,Oxidored_q6,Pyr_redox_2,Reductase_C,Rieske_2,SUKH_5,Semialdhyde_dh,SnoaL_4,SusD-like_3,SusD_RagB,TP_methylase,TrkA_N,cNMP_binding
## 9 UreE_C,UreE_N
## 10 Sod_Fe_C,Sod_Fe_N
## 11 Urease_gamma
## 12 Complex1_30kDa,Complex1_49kDa,NiFeSe_Hases
## 13 Complex1_49kDa,NiFeSe_Hases
## 14 NiFe-hyd_HybE
## 15 Glycos_trans_3N,UreE_C,UreE_N
## 16 Amidohydro_1,Urease_alpha,Urease_beta,Urease_gamma
## R2adj Coef pvallm corspear pvalspear pvallm.adj
## 1 0.331866595 3.962379e-05 0.0009872402 0.7254111 0.002 0.02981513
## 2 0.317265809 8.103740e-05 0.0013153735 0.6935922 0.000 0.02981513
## 3 0.151121519 2.938053e-06 0.0256758123 0.6319618 0.001 0.19309924
## 4 0.145079670 3.434406e-05 0.0283969464 0.5097153 0.013 0.19309924
## 5 0.111128767 7.398130e-06 0.0497784818 0.5055999 0.010 0.30772152
## 6 0.078478490 3.655282e-06 0.0851109119 0.4807996 0.012 0.41953053
## 7 0.098908907 6.587840e-06 0.0608471438 0.4105870 0.034 0.34480048
## 8 0.019805370 8.241820e-08 0.2282943965 0.3643920 0.040 0.73923900
## 9 0.046628547 -1.063562e-04 0.1442940943 -0.3136003 0.049 0.56253836
## 10 0.037648507 -1.045910e-04 0.1678821425 -0.3224729 0.032 0.60084135
## 11 0.077583070 -1.135822e-04 0.0863739331 -0.3854989 0.018 0.41953053
## 12 0.209854816 -4.026930e-05 0.0094478715 -0.3965132 0.021 0.09177932
## 13 -0.008788139 -1.100548e-05 0.3875126796 -0.4149795 0.021 0.87836207
## 14 -0.032870989 -1.188045e-07 0.6813962508 -0.4866966 0.018 0.95036552
## 15 0.248018950 -1.128776e-05 0.0048073750 -0.5446816 0.009 0.06538030
## 16 0.404117117 -1.660958e-05 0.0002190093 -0.5785237 0.007 0.01489263
## pvalspear.adj
## 1 0.04533333
## 2 0.00000000
## 3 0.03400000
## 4 0.11050000
## 5 0.11050000
## 6 0.11050000
## 7 0.16514286
## 8 0.18133333
## 9 0.20800000
## 10 0.16514286
## 11 0.11900000
## 12 0.11900000
## 13 0.11900000
## 14 0.11900000
## 15 0.11050000
## 16 0.11050000
Results_lm_FL[which(Results_lm_FL$pvalspear<0.05 & Results_lm_FL$corspear>0),] %>% arrange(desc(corspear)) # 7 have coeff >0 and 3 passes the adjustment
## PFAM_ID
## 1 LysE,UDG
## 2 Sod_Ni
## 3 HMA,PB1,Sod_Cu,TPR_1,XPG_I_2
## 4 HupE_UreJ
## 5 UreD,UreF
## 6 HMA,Sod_Cu
## 7 Amidohydro_1,DUF599,Fungal_trans,HMG_box,Urease_alpha,Urease_beta,Urease_gamma
## 8 Acetyltransf_1,Acetyltransf_7,DAO,DUF1858,DUF1990,DoxX_3,EF-hand_1,EF-hand_5,EF-hand_7,Epimerase,FAD_binding_6,Fer2,Fer2_4,Fer2_BFD,Fer4_11,Ferric_reduct,NAD_binding_1,NAD_binding_10,NAD_binding_4,NIR_SIR,NIR_SIR_ferr,NO_synthase,NiFe_hyd_SSU_C,NifU,NifU_N,NmrA,Oxidored_q6,Pyr_redox_2,Reductase_C,Rieske_2,SUKH_5,Semialdhyde_dh,SnoaL_4,SusD-like_3,SusD_RagB,TP_methylase,TrkA_N,cNMP_binding
## R2adj Coef pvallm corspear pvalspear pvallm.adj
## 1 0.33186660 3.962379e-05 0.0009872402 0.7254111 0.002 0.02981513
## 2 0.31726581 8.103740e-05 0.0013153735 0.6935922 0.000 0.02981513
## 3 0.15112152 2.938053e-06 0.0256758123 0.6319618 0.001 0.19309924
## 4 0.14507967 3.434406e-05 0.0283969464 0.5097153 0.013 0.19309924
## 5 0.11112877 7.398130e-06 0.0497784818 0.5055999 0.010 0.30772152
## 6 0.07847849 3.655282e-06 0.0851109119 0.4807996 0.012 0.41953053
## 7 0.09890891 6.587840e-06 0.0608471438 0.4105870 0.034 0.34480048
## 8 0.01980537 8.241820e-08 0.2282943965 0.3643920 0.040 0.73923900
## pvalspear.adj
## 1 0.04533333
## 2 0.00000000
## 3 0.03400000
## 4 0.11050000
## 5 0.11050000
## 6 0.11050000
## 7 0.16514286
## 8 0.18133333
ggplot() +
geom_smooth(aes(y=PFAM_matrix_FL_nomertz[,"Sod_Ni"]*100,x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
geom_point(aes(y=PFAM_matrix_FL_nomertz[,"Sod_Ni"]*100,x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE), size=3) +
theme_bw() +
labs(y="Sod_Ni", x=expression(delta^60 * Ni ~ "(‰)")) +
theme(text = element_text(size = 16))
ggplot() +
geom_smooth(aes(y=PFAM_matrix_FL_nomertz[,"LysE,UDG"]*100,x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
geom_point(aes(y=PFAM_matrix_FL_nomertz[,"LysE,UDG"]*100,x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE), size=3) +
theme_bw() +
labs(y="LysE,UDG", x=expression(delta^60 * Ni ~ "(‰)")) +
theme(text = element_text(size = 16))
# ATT size fraction
Results_lm_ATT=data.frame(PFAM_ID=c("PFAM_ID"),R2adj=c(0.0),Coef=c(0.0), pvallm=c(0.0), corspear=c(0.0),pvalspear=c(0.0))
# We need to match orders of row names before computing any stats :
PFAM_matrix_ATT=PFAM_matrix_ATT[match(meta_nickel_ATT$ACE_seq_name,row.names(PFAM_matrix_ATT)),]
for (i in c(1:ncol(PFAM_matrix_ATT))) {
res=lm(PFAM_matrix_ATT[,i]~meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE)
agc_id=names(PFAM_matrix_ATT)[i]
R2=summary(res)$adj.r.squared
pvallm=lmp(res)
slope=res$coefficients[2]
corspear=cor(PFAM_matrix_ATT[,i],meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE,method = "spearman")
pvalspear=perm_test(PFAM_matrix_ATT[,i],meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE,method = "Spearman", alternative = "two.sided")$p.value
Results_lm_ATT[i,]=c(agc_id,R2,slope,pvallm,corspear,pvalspear)
}
Results_lm_ATT$PFAM_ID=as.factor(Results_lm_ATT$PFAM_ID)
Results_lm_ATT$R2adj=as.numeric(Results_lm_ATT$R2adj)
Results_lm_ATT$Coef=as.numeric(Results_lm_ATT$Coef)
Results_lm_ATT$pvallm=as.numeric(Results_lm_ATT$pvallm)
Results_lm_ATT$pvallm.adj=p.adjust(Results_lm_ATT$pvallm, method="BH")
Results_lm_ATT$corspear=as.numeric(Results_lm_ATT$corspear)
Results_lm_ATT$pvalspear=as.numeric(Results_lm_ATT$pvalspear)
Results_lm_ATT$pvalspear.adj=p.adjust(Results_lm_ATT$pvalspear, method="BH")
summary(Results_lm_ATT)
## PFAM_ID
## Acetyltransf_1,Acetyltransf_7,DAO,DUF1858,DUF1990,DoxX_3,EF-hand_1,EF-hand_5,EF-hand_7,Epimerase,FAD_binding_6,Fer2,Fer2_4,Fer2_BFD,Fer4_11,Ferric_reduct,NAD_binding_1,NAD_binding_10,NAD_binding_4,NIR_SIR,NIR_SIR_ferr,NO_synthase,NiFe_hyd_SSU_C,NifU,NifU_N,NmrA,Oxidored_q6,Pyr_redox_2,Reductase_C,Rieske_2,SUKH_5,Semialdhyde_dh,SnoaL_4,SusD-like_3,SusD_RagB,TP_methylase,TrkA_N,cNMP_binding: 1
## Acylphosphatase,FGGY_C,HycI,Sua5_yciO_yrdC,zf-HYPF : 1
## Acylphosphatase,FGGY_C,Peptidase_M22,Sua5_yciO_yrdC,zf-HYPF : 1
## Acylphosphatase,Sua5_yciO_yrdC,zf-HYPF : 1
## Amidohydro_1,DUF599,Fungal_trans,HMG_box,Urease_alpha,Urease_beta,Urease_gamma : 1
## Amidohydro_1,Urease_alpha,Urease_beta,Urease_gamma : 1
## (Other) :51
## R2adj Coef pvallm corspear
## Min. :-0.06249 Min. :-1.864e-05 Min. :0.0003622 Min. :-0.5449
## 1st Qu.:-0.03226 1st Qu.:-8.858e-07 1st Qu.:0.1535460 1st Qu.:-0.2940
## Median :-0.00151 Median :-1.757e-07 Median :0.3382864 Median :-0.1169
## Mean : 0.03294 Mean : 2.645e-06 Mean :0.3688405 Mean :-0.0724
## 3rd Qu.: 0.06822 3rd Qu.: 1.429e-06 3rd Qu.:0.5033539 3rd Qu.: 0.1398
## Max. : 0.53124 Max. : 3.405e-05 Max. :0.9929004 Max. : 0.7180
##
## pvalspear pvallm.adj pvalspear.adj
## Min. :0.0000 Min. :0.02065 Min. :0.0000
## 1st Qu.:0.1050 1st Qu.:0.53334 1st Qu.:0.3856
## Median :0.3160 Median :0.64327 Median :0.5928
## Mean :0.3667 Mean :0.62430 Mean :0.5769
## 3rd Qu.:0.5850 3rd Qu.:0.66090 3rd Qu.:0.7755
## Max. :0.9320 Max. :0.99290 Max. :0.9320
##
# Some spearman might be worth investigating
Results_lm_ATT[which(Results_lm_ATT$pvalspear<0.05),] %>% arrange(desc(corspear)) #A few significant according to spearman pre-correction
## PFAM_ID
## 1 UreD,UreF
## 2 Acylphosphatase,FGGY_C,HycI,Sua5_yciO_yrdC,zf-HYPF
## 3 Amidohydro_1,Urease_alpha,Urease_beta,Urease_gamma
## 4 HMA,Sod_Cu
## 5 Complex1_30kDa,Complex1_49kDa,Methyltransf_28,NiFeSe_Hases,Oxidored_q6
## R2adj Coef pvallm corspear pvalspear pvallm.adj
## 1 0.53124021 2.306228e-05 0.0003622142 0.7179752 0.006 0.02064621
## 2 0.06822015 -5.821426e-08 0.1535460437 -0.3975649 0.000 0.53334166
## 3 0.09526412 -2.677682e-07 0.1142963274 -0.4879092 0.023 0.53334166
## 4 0.16172942 -8.715604e-07 0.0551235463 -0.4947732 0.017 0.49741488
## 5 0.01849502 -2.419457e-06 0.2674190909 -0.5449049 0.003 0.64327267
## pvalspear.adj
## 1 0.11400
## 2 0.00000
## 3 0.26220
## 4 0.24225
## 5 0.08550
Results_lm_ATT[which(Results_lm_ATT$pvalspear<0.05 & Results_lm_ATT$corspear>0),] %>% arrange(desc(corspear)) # Only 1 have coeff >0 and none passes the adjustment
## PFAM_ID R2adj Coef pvallm corspear pvalspear pvallm.adj
## 1 UreD,UreF 0.5312402 2.306228e-05 0.0003622142 0.7179752 0.006 0.02064621
## pvalspear.adj
## 1 0.114
ggplot() +
geom_smooth(aes(y=PFAM_matrix_ATT[,"UreD,UreF"]*100,x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
geom_point(aes(y=PFAM_matrix_ATT[,"UreD,UreF"]*100,x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_ATT$MertzGlacier), size=3) +
theme_bw() +
labs(y="UreD,UreF", x=expression(delta^60 * Ni ~ "(‰)")) +
theme(text = element_text(size = 16))
# The most interesting can go into figures :
g5 <- ggplot() +
geom_smooth(aes(y=PFAM_matrix_FL_nomertz[,"Sod_Ni"]*100,x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2, col="grey20") +
geom_point(aes(y=PFAM_matrix_FL[,"Sod_Ni"]*100,x=meta_nickel_FL[, "Ni_60_58_D_DELTA_BOTTLE"], shape=meta_nickel_FL[, "MertzGlacier"], col=meta_nickel_FL[, "MertzGlacier"]), size=3) +
scale_color_manual(values=c("black","skyblue")) +
theme_bw() +
labs(y="Sod_Ni\n(PFAM annotation, % of coverage)", x=expression(delta^60 * Ni ~ "(‰)"), shape= "Mertz Glacier", col="Mertz Glacier") +
theme(text = element_text(size = 16))
g5
g6 <- ggplot() +
geom_smooth(aes(y=PFAM_matrix_ATT[,"UreD,UreF"]*100,x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2, col="grey20") +
geom_point(aes(y=PFAM_matrix_ATT[,"UreD,UreF"]*100,x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_ATT$MertzGlacier), size=3) +
theme_bw() +
labs(y="UreD,UreF\n(PFAM annotation, % of coverage)", x=expression(delta^60 * Ni ~ "(‰)"), shape= "Mertz Glacier") +
theme(text = element_text(size = 16))
g6
# Load AGC abundances
GeneMat = read.table("/Users/emifaure/Documents/ACE/NolwennCollab/Post_Review/GM_AGN_TMAX60GQ_transposed_Metallo-without-nickelgenes_rCLR-transformed.tsv", sep="\t")
# If using AGC table non rCLR transformed :
#AGCList_withtoutgeneralist=read.table("/Users/emifaure/Documents/ACE/NolwennCollab/AGC_Metallo_WithoutNickelGenes.tsv", sep="\t", header=F)
#GeneMat=GeneMat[,which(names(GeneMat) %in% AGCList_withtoutgeneralist$V1)]
GeneMat_Nickel = GeneMat[row.names(GeneMat) %in% meta_nickel$ACE_seq_name,]
GeneMat_Nickel_FL = GeneMat[row.names(GeneMat) %in% meta_nickel_FL$ACE_seq_name,]
GeneMat_Nickel_FL=GeneMat_Nickel_FL[match(meta_nickel_FL$ACE_seq_name,row.names(GeneMat_Nickel_FL)),]
#Remove columns of only 0
GeneMat_Nickel_FL = GeneMat_Nickel_FL[,-which(colSums(GeneMat_Nickel_FL)==0)]
GeneMat_Nickel_ATT = GeneMat[row.names(GeneMat) %in% meta_nickel_ATT$ACE_seq_name,]
GeneMat_Nickel_ATT=GeneMat_Nickel_ATT[match(meta_nickel_ATT$ACE_seq_name,row.names(GeneMat_Nickel_ATT)),]
#Remove columns of only 0
GeneMat_Nickel_ATT = GeneMat_Nickel_ATT[,-which(colSums(GeneMat_Nickel_ATT)==0)]
# We want to associate taxonomy to each AGC, load nickel-related contigs taxo :
Taxopos=read.table('/Users/emifaure/Documents/ACE/NolwennCollab/Post_Review/Taxonomy_Uniref90_Contigs_metallo_WithoutNickelGenes_WithKrakenLineage.tsv', sep="\t", header=F,fill=T, quote = '', na.strings="")
# Information in column 6 is redundant with column 5
Taxopos=Taxopos[,-6]
names(Taxopos)=c("Contig","Seed","HighestRank","HighestAnnot_uniref","Annot_uniref","Annot_gtdb")
Taxopos=separate_wider_delim(Taxopos, cols = Annot_uniref, delim = ";", names = c("Species_uniref","Genus_uniref","Family_uniref","Order_uniref",
"Class_uniref","Phylum_uniref","Domain_uniref","Root_uniref"))
Taxopos=separate_wider_delim(Taxopos, cols = Annot_gtdb, delim = ";", names = c("Root_gtdb","Domain_gtdb","Phylum_gtdb","Class_gtdb","Order_gtdb","Family_gtdb","Genus_gtdb","Species_gtdb"), too_few = c("align_start"))
# Needs cleaning
Taxopos=select(Taxopos, -c("Seed","Root_gtdb","Root_uniref"))
Genes=AnnotFull
Genes$Contigs_ID=sub("_[^_]+$", "", Genes$Gene_ID)
Genes$Contigs_ID=gsub("ACE_", "", Genes$Contigs_ID)
Genes_Taxo_Annot=merge(Genes,Taxopos,by.x="Contigs_ID", by.y="Contig")
Genes_Taxo_Annot <- Genes_Taxo_Annot %>% replace_with_na_all(condition = ~.x == "")
Genes_Taxo_Annot <- Genes_Taxo_Annot %>% mutate_if(is.character,as.factor)
Genes_Taxo_Annot<-as.data.frame(Genes_Taxo_Annot)
summary(Genes_Taxo_Annot)
## Contigs_ID Gene_ID AGC_ID
## G105_260239: 9 ACE_G1_100688_153808264: 1 AGC_7792447 : 2463
## G142_206063: 9 ACE_G1_102611_4772586 : 1 AGC_3476410 : 1676
## G101_245639: 8 ACE_G1_104559_74923114 : 1 AGC_15613456: 1334
## G102_143898: 8 ACE_G1_108198_66157108 : 1 AGC_8803290 : 1230
## G103_22249 : 8 ACE_G1_110264_118743107: 1 AGC_27255931: 816
## G106_115049: 8 ACE_G1_112699_74923939 : 1 AGC_1382956 : 799
## (Other) :29837 (Other) :29881 (Other) :21569
## AGC_Rep AGC_Size AGC_Cat
## ACE_G36_000000069010_49679286 : 2463 Min. : 1.0 DISC : 7648
## ACE_G152_000000097994_90230753: 1676 1st Qu.: 50.0 EU : 229
## ACE_G174_000000033486_3784048 : 1334 Median : 267.0 GU : 1827
## ACE_G86_000000123516_86804048 : 1230 Mean : 766.8 K :17826
## ACE_G91_000000108792_139750272: 816 3rd Qu.: 1230.0 KWP : 1635
## ACE_G20_000000033145_127731905: 799 Max. :14756.0 SINGL: 722
## (Other) :21569
## Singl_Cat CDHit_Rep Domain
## EU : 67 ACE_G101_69959_35742 : 118 Eukaryote : 1337
## GU : 171 ACE_G103_122771_145070 : 102 Prokaryote:28550
## K : 210 ACE_G6_41384_85802909 : 97
## KWP: 274 ACE_G94_172308_148699120: 84
## NA :29165 ACE_G162_58692_11784686 : 83
## ACE_G216_112674_5147263 : 83
## (Other) :29320
## KEGG seed_ortholog
## K04564 : 5267 666509.RCA23_c18480 : 712
## K03188 : 2355 2903.EOD32519 : 453
## K03190 : 2241 643867.Ftrac_3655 : 453
## K03187 : 2139 1189620.AJXL01000002_gene1866: 426
## K03189 : 1604 1131266.ARWQ01000023_gene389 : 325
## (Other): 1438 1131266.ARWQ01000023_gene386 : 303
## NA's :14843 (Other) :27215
## eggNOG_OGs
## COG0605@1|root,COG0605@2|Bacteria,1MVW2@1224|Proteobacteria,2TU3T@28211|Alphaproteobacteria : 1475
## COG0804@1|root,2QPKB@2759|Eukaryota : 894
## COG0605@1|root,COG0605@2|Bacteria,1MVW2@1224|Proteobacteria,2TU3T@28211|Alphaproteobacteria,4BQ03@82117|unclassified Alphaproteobacteria: 597
## COG2032@1|root,COG2032@2|Bacteria : 593
## 2A42S@1|root,30SMP@2|Bacteria : 532
## arCOG06188@1|root,arCOG06188@2157|Archaea,41STJ@651137|Thaumarchaeota : 522
## (Other) :25274
## narr_OG_name narr_OG_cat
## 2TU3T@28211|Alphaproteobacteria : 1475 O :9291
## 2QPKB@2759|Eukaryota : 894 P :6244
## 4BQ03@82117|unclassified Alphaproteobacteria: 597 E :4020
## COG2032@2|Bacteria : 593 S :3096
## 30SMP@2|Bacteria : 532 C :3051
## 41STJ@651137|Thaumarchaeota : 522 KO :2572
## (Other) :25274 (Other):1613
## narr_OG_desc
## Destroys radicals which are normally produced within the cells and which are toxic to biological systems : 5382
## Required for maturation of urease via the functional incorporation of the urease nickel metallocenter : 4321
## Facilitates the functional incorporation of the urease nickel metallocenter. This process requires GTP hydrolysis, probably effectuated by UreG: 2561
## Nickel-containing superoxide dismutase : 2439
## Involved in urease metallocenter assembly. Binds nickel. Probably functions as a nickel donor during metallocenter assembly : 1874
## superoxide dismutase activity : 1638
## (Other) :11672
## best_OG_name best_OG_cat
## 2TU3T@28211|Alphaproteobacteria: 2942 O :9613
## 4NGGW@976|Bacteroidetes : 1022 P :7591
## 2QPKB@2759|Eukaryota : 894 E :4020
## 4NDZ4@976|Bacteroidetes : 872 S :3093
## COG2032@2|Bacteria : 593 KO :2744
## 1G5DA@1117|Cyanobacteria : 571 (Other):2818
## (Other) :22993 NA's : 8
## best_OG_desc
## Destroys radicals which are normally produced within the cells and which are toxic to biological systems : 5388
## Required for maturation of urease via the functional incorporation of the urease nickel metallocenter : 4437
## Facilitates the functional incorporation of the urease nickel metallocenter. This process requires GTP hydrolysis, probably effectuated by UreG: 2625
## Involved in urease metallocenter assembly. Binds nickel. Probably functions as a nickel donor during metallocenter assembly : 1933
## Nickel-containing superoxide dismutase : 1593
## (Other) :13903
## NA's : 8
## Preferred_name CAZy
## sodB :3627 NA's:29887
## ureG :2614
## ureF :2308
## ureD :2274
## ureE :2009
## (Other):8221
## NA's :8834
## BiGG_Reaction
## iJN678.ureG : 267
## iECH74115_1262.ECH74115_1322,iECO103_1326.ECO103_3798,iECO111_1330.ECO111_1228,iECSP_1301.ECSP_1250,iECs_1301.ECs1323,iUMNK88_1353.UMNK88_1200,iZ_1308.Z1144,iZ_1308.Z1583 : 179
## iECH74115_1262.ECH74115_1321,iECO103_1326.ECO103_3797,iECO111_1330.ECO111_1227,iECO26_1355.ECO26_1280,iECSP_1301.ECSP_1249,iECs_1301.ECs1322,iUMNK88_1353.UMNK88_1199,iZ_1308.Z1143,iZ_1308.Z1582 : 147
## iAPECO1_1312.APECO1_76,iUTI89_1310.UTI89_C1040 : 84
## iAF1260.b1656,iAPECO1_1312.APECO1_736,iB21_1397.B21_01616,iBWG_1329.BWG_1471,iE2348C_1286.E2348C_1742,iEC042_1314.EC042_1825,iEC55989_1330.EC55989_1824,iECABU_c1320.ECABU_c19090,iECBD_1354.ECBD_1987,iECB_1328.ECB_01626,iECDH10B_1368.ECDH10B_1790,iECDH1ME8569_1439.ECDH1ME8569_1601,iECD_1391.ECD_01626,iECED1_1282.ECED1_1855,iECH74115_1262.ECH74115_2368,iECIAI1_1343.ECIAI1_1708,iECO103_1326.ECO103_1797,iECO111_1330.ECO111_2126,iECO26_1355.ECO26_2385,iECOK1_1307.ECOK1_1775,iECP_1309.ECP_1601,iECS88_1305.ECS88_1705,iECSE_1348.ECSE_1780,iECSP_1301.ECSP_2222,iECUMN_1333.ECUMN_1946,iECW_1372.ECW_m1823,iECs_1301.ECs2365,iEKO11_1354.EKO11_2118,iETEC_1333.ETEC_1691,iEcDH1_1363.EcDH1_1984,iEcE24377_1341.EcE24377A_1869,iEcHS_1320.EcHS_A1735,iEcSMS35_1347.EcSMS35_1542,iEcolC_1368.EcolC_1973,iG2583_1286.G2583_2051,iJO1366.b1656,iJR904.b1656,iLF82_1304.LF82_2148,iNRG857_1313.NRG857_08300,iSBO_1134.SBO_1475,iSDY_1059.SDY_1882,iSFV_1184.SFV_1678,iSF_1195.SF1684,iSFxv_1172.SFxv_1892,iSSON_1240.SSON_1500,iS_1188.S1816,iSbBS512_1146.SbBS512_E1853,iUMN146_1321.UM146_08870,iUMNK88_1353.UMNK88_2117,iUTI89_1310.UTI89_C1847,iWFL_1372.ECW_m1823,iY75_1357.Y75_RS08680,iZ_1308.Z2678,ic_1306.c2050: 80
## (Other) : 165
## NA's :28965
## PFAMs FuncSimplify HighestRank
## Sod_Fe_C,Sod_Fe_N: 5193 NiFe Hydrogenase: 1739 species :12032
## cobW : 3216 SOD :11180 no rank : 8908
## UreF : 3045 Urease :16968 phylum : 2195
## UreE_C,UreE_N : 2720 superkingdom: 1866
## UreD : 2700 order : 1208
## (Other) :12810 class : 1179
## NA's : 203 (Other) : 2499
## HighestAnnot_uniref Species_uniref
## root : 6532 unknown : 7758
## Bacteria : 1458 uc_Bacteria : 1458
## unclassified : 1198 uc_Thaumarchaeota : 1023
## Thaumarchaeota : 1023 Phaeocystis antarctica: 818
## Phaeocystis antarctica: 818 uc_Proteobacteria : 740
## Proteobacteria : 740 (Other) :16892
## (Other) :18118 NA's : 1198
## Genus_uniref Family_uniref Order_uniref
## unknown :14592 unknown :12382 unknown :10929
## uc_Bacteria : 1458 uc_Bacteria : 1458 Rhodobacterales : 2119
## uc_Thaumarchaeota: 1023 uc_Thaumarchaeota: 1023 uc_Bacteria : 1458
## Phaeocystis : 818 Rhodobacteraceae : 988 Flavobacteriales : 1310
## uc_Proteobacteria: 740 Phaeocystaceae : 818 uc_Thaumarchaeota: 1023
## (Other) :10058 (Other) :12020 (Other) :11850
## NA's : 1198 NA's : 1198 NA's : 1198
## Class_uniref Phylum_uniref Domain_uniref
## unknown :10746 Proteobacteria:9810 unknown :25944
## Alphaproteobacteria: 5259 unknown :7936 uc_Bacteria : 1458
## Gammaproteobacteria: 2839 Thaumarchaeota:1635 Viridiplantae: 532
## uc_Bacteria : 1458 Bacteroidetes :1594 uc_Eukaryota : 223
## Flavobacteriia : 1311 uc_Bacteria :1458 uc_Archaea : 185
## (Other) : 7076 (Other) :6256 (Other) : 347
## NA's : 1198 NA's :1198 NA's : 1198
## Domain_gtdb Phylum_gtdb Class_gtdb
## Archaea : 3225 Pseudomonadota:15692 Alphaproteobacteria:8665
## Bacteria:26048 Thermoproteota: 2449 Gammaproteobacteria:6901
## NA's : 614 Bacteroidota : 2141 Nitrososphaeria :2448
## Nitrospinota : 1418 Bacteroidia :2122
## Actinomycetota: 1162 Nitrospinia :1417
## (Other) : 4969 (Other) :6116
## NA's : 2056 NA's :2218
## Order_gtdb Family_gtdb
## Rhodobacterales : 4618 Rhodobacteraceae : 4618
## Pseudomonadales : 2945 Nitrosopumilaceae: 2448
## Nitrososphaerales: 2448 VA-1 : 1416
## Nitrospinales : 1417 Alteromonadaceae : 1147
## Flavobacteriales : 1404 Flavobacteriaceae: 903
## (Other) :13951 (Other) :15985
## NA's : 3104 NA's : 3370
## Genus_gtdb Species_gtdb
## Planktomarina : 1510 Psychrobacter sp000511655 : 571
## Nitrosopelagicus: 1364 Planktomarina sp028822275 : 499
## Nitrosopumilus : 1081 Octadecabacter sp028825445: 451
## Nitromaritima : 785 Planktomarina sp905182195 : 446
## Psychrobacter : 602 GCA-2721545 sp905612245 : 408
## (Other) :20720 (Other) :22738
## NA's : 3825 NA's : 4774
# Finally, switch to AGC level (which will oversimplify but for visuals it's the only option)
AGC_Taxo_Annot = Genes_Taxo_Annot %>% select(c(AGC_ID,"Species_uniref","Genus_uniref","Family_uniref","Order_uniref",
"Class_uniref","Phylum_uniref","Domain_uniref","Domain_gtdb","Phylum_gtdb",
"Class_gtdb","Order_gtdb","Family_gtdb","Genus_gtdb","Species_gtdb"))
Mode <- function(x) {
ux <- na.omit(unique(x))
ux[which.max(tabulate(match(x, ux)))]
}
AGC_Taxo_Annot = AGC_Taxo_Annot %>%
group_by(AGC_ID) %>%
summarise_all(Mode)
summary(AGC_Taxo_Annot)
## AGC_ID Species_uniref Genus_uniref
## AGC_10001997: 1 unknown : 271 unknown :887
## AGC_10048680: 1 Phaeocystis antarctica : 98 Phaeocystis: 97
## AGC_10052113: 1 uc_Bacteria : 54 Aureococcus: 49
## AGC_1008101 : 1 Aureococcus anophagefferens: 50 uc_Bacteria: 45
## AGC_10118919: 1 Euryarchaeota archaeon : 49 Bathycoccus: 31
## AGC_1012388 : 1 (Other) :1539 (Other) :952
## (Other) :2142 NA's : 87 NA's : 87
## Family_uniref Order_uniref Class_uniref
## unknown : 713 unknown : 553 unknown :626
## Phaeocystaceae : 97 Flavobacteriales: 154 Alphaproteobacteria:273
## Flavobacteriaceae: 96 Rhodobacterales : 98 Gammaproteobacteria:185
## uc_Bacteria : 48 Phaeocystales : 97 Flavobacteriia :152
## Alteromonadaceae : 42 Mamiellales : 75 Mamiellophyceae : 75
## (Other) :1065 (Other) :1084 (Other) :750
## NA's : 87 NA's : 87 NA's : 87
## Phylum_uniref Domain_uniref Domain_gtdb
## Proteobacteria:598 unknown :1787 Archaea : 64
## unknown :314 Viridiplantae: 93 Bacteria:1977
## Bacteroidetes :181 Metazoa : 67 NA's : 107
## Haptophyta :181 uc_Bacteria : 41
## Chlorophyta : 79 Fungi : 28
## (Other) :708 (Other) : 45
## NA's : 87 NA's : 87
## Phylum_gtdb Class_gtdb Order_gtdb
## Pseudomonadota :858 Gammaproteobacteria:434 Rhodobacterales : 159
## Bacteroidota :225 Alphaproteobacteria:400 Flavobacteriales: 152
## Actinomycetota :201 Bacteroidia :224 Burkholderiales : 134
## Verrucomicrobiota: 88 Actinomycetes :181 Pseudomonadales : 120
## Planctomycetota : 81 Verrucomicrobiae : 86 Enterobacterales: 114
## (Other) :401 (Other) :513 (Other) :1094
## NA's :294 NA's :310 NA's : 375
## Family_gtdb Genus_gtdb
## Rhodobacteraceae : 162 Alteromonas : 65
## Flavobacteriaceae: 108 Streptomyces : 53
## Alteromonadaceae : 92 Planktomarina : 37
## Burkholderiaceae : 63 GCA-2721545 : 24
## Streptomycetaceae: 53 Nitrosopelagicus: 24
## (Other) :1280 (Other) :1539
## NA's : 390 NA's : 406
## Species_gtdb
## Alteromonas alba : 47
## GCA-2721545 sp905612245 : 23
## Psychrobacter sp000511655: 20
## Planktomarina sp028822275: 16
## Sulfuritalea sp022843505 : 16
## (Other) :1558
## NA's : 468
# Add functional information, pre-defined as the mode of all annotations within the AGC (not taking only into account nickel-related genes/contigs)
AGC_function <- fread("/Users/emifaure/Documents/ACE/NolwennCollab/Post_Review/Annotations_AGC_Metallo_AGCLevel.tsv", data.table=F)
names(AGC_function)=c("AGC_ID","AGC_Rep","AGC_Size","AGC_Cat","Singl_Cat","KEGG","seed_ortholog","eggNOG_OGs","narr_OG_name","narr_OG_cat","narr_OG_desc","best_OG_name","best_OG_cat","best_OG_desc","Preferred_name","CAZy","BiGG_Reaction","PFAMs")
AGC_Taxo_Annot <- merge(AGC_function, AGC_Taxo_Annot, by="AGC_ID")
AGC_Taxo_Annot<-as.data.frame(AGC_Taxo_Annot)
# Multivariate analysis
#. I- FL
# We'll try to compute an RDA to determine how much of cluster abundance is driven by Nickel
meta_nickel_FL_rda = select(meta_nickel_FL,c("ACE_seq_name","Ni_60_58_D_DELTA_BOTTLE","Ni_D_CONC_BOTTLE"))
row.names(meta_nickel_FL_rda)=meta_nickel_FL_rda$ACE_seq_name
meta_nickel_FL_rda=select(meta_nickel_FL_rda,-c("ACE_seq_name"))
meta_nickel_FL_rda=data.frame(scale(meta_nickel_FL_rda))
# Get rid of the AGC present in only 1 sample
GeneMat_Nickel_FL <- GeneMat_Nickel_FL[,-which(colSums(GeneMat_Nickel_FL>0)<2)]
# IF USING non transformed data, need to transform the abundances to deal with compositionality
#GeneMat_Nickel_FL <- decostand(GeneMat_Nickel_FL, method="rclr")
# Match the row order
GeneMat_Nickel_FL_rda=GeneMat_Nickel_FL[match(row.names(meta_nickel_FL_rda),row.names(GeneMat_Nickel_FL)),]
RDA=rda(GeneMat_Nickel_FL_rda ~ ., data = meta_nickel_FL_rda)
#summary(RDA)
RsquareAdj(RDA) #16.9%
## $r.squared
## [1] 0.2261923
##
## $adj.r.squared
## [1] 0.1688732
anova.cca(RDA) #Significant
## Permutation test for rda under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: rda(formula = GeneMat_Nickel_FL_rda ~ Ni_60_58_D_DELTA_BOTTLE + Ni_D_CONC_BOTTLE, data = meta_nickel_FL_rda)
## Df Variance F Pr(>F)
## Model 2 85.907 3.9462 0.001 ***
## Residual 27 293.889
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova.cca(RDA, by="axis") # Both significant
## Permutation test for rda under reduced model
## Forward tests for axes
## Permutation: free
## Number of permutations: 999
##
## Model: rda(formula = GeneMat_Nickel_FL_rda ~ Ni_60_58_D_DELTA_BOTTLE + Ni_D_CONC_BOTTLE, data = meta_nickel_FL_rda)
## Df Variance F Pr(>F)
## RDA1 1 51.492 4.7307 0.002 **
## RDA2 1 34.415 3.1617 0.005 **
## Residual 27 293.889
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
scores=scoresRDA(RDA)
##
## -----------------------------------------------------------------------
## Site constraints (lc) selected. To obtain site scores that are weighted
## sums of species scores (default in vegan), argument site.sc must be set
## to wa.
## -----------------------------------------------------------------------
samples = scores$Sites
samples = as.data.frame(samples)
samples = merge(samples,meta_nickel_FL, by.x="row.names", by.y="ACE_seq_name")
row.names(samples)=samples[,1]
samples=samples[,-1]
# Retrieve AGC scores
AGCScore <- scores$Species
AGCScore = as.data.frame(AGCScore)
# Keep only scores away from center
AGCScore_select = AGCScore[which(AGCScore$RDA1>0.75 | AGCScore$RDA1<(-0.75) | AGCScore$RDA2>0.75 | AGCScore$RDA2<(-0.75)),]
enviscore=scores$Biplot
#Plot the triplot
ggplot() + geom_hline(yintercept = 0, linetype='dotted') +
geom_vline(xintercept = 0, linetype='dotted') +
labs(x = paste0("RDA1 (",
round(100 * scores$Eigenval.rel[1], 1), "%)"),
y = paste0("RDA2 (",
round(100 * scores$Eigenval.rel[2], 1), "%)")) +
theme(plot.title=element_text(hjust=0.5)) +
geom_segment(data=as.data.frame(AGCScore),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.5,color = 'indianred2',arrow = arrow(length = unit(0.2,"cm")), alpha=0.25) +
geom_point(aes(samples[,1], samples[,2], col=samples$Ni_60_58_D_DELTA_BOTTLE), size = 5, alpha = 0.9) +
geom_text(aes(samples[,1], samples[,2], label=samples$TM_station_number), col="white", size=3) +
geom_segment(data=as.data.frame(enviscore),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.5, linetype="F1",color = 'orange',arrow = arrow(length = unit(0.2,"cm"))) +
geom_text(aes(enviscore[,1]*1.05, enviscore[,2]*1.05, label=rownames(enviscore)), color="orange") +
geom_text(aes(AGCScore_select[,1]*1.05, AGCScore_select[,2]*1.05, label=rownames(AGCScore_select)), color="indianred2") +
labs(col = expression(delta^60 * Ni ~ "(‰)")) +
geom_segment(data=as.data.frame(AGCScore_select),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.5,color = 'indianred2',arrow = arrow(length = unit(0.2,"cm"))) +
theme_bw()
# A pack of AGC is associated to station 11
MertzAGC <- AGCScore[which(AGCScore$RDA1>0.5 & AGCScore$RDA2>(-0.5) & AGCScore$RDA2<(0)),]
MertzAGC <- MertzAGC[order(MertzAGC$RDA2, decreasing = F),]
MertzAGCannot <- AGC_Taxo_Annot[which(AGC_Taxo_Annot$AGC_ID %in% row.names(MertzAGC)),]
MertzAGCannot <- MertzAGCannot[match(row.names(MertzAGC), MertzAGCannot$AGC_ID),]
MertzAGCannot
## AGC_ID AGC_Rep AGC_Size AGC_Cat Singl_Cat
## 517 AGC_17036882 ACE_G151_000000191440_20051104 149 K
## 327 AGC_14355128 ACE_G170_000000261377_170136783 444 K
## 781 AGC_20916161 ACE_G22_000000108730_163065181 279 K
## 593 AGC_1819471 ACE_G23_000000163918_128019073 120 KWP
## 925 AGC_22886549 ACE_G105_000000212698_61593594 101 K
## 1469 AGC_3062244 ACE_G110_000000183616_158277234 12 GU
## 1680 AGC_5999524 ACE_G151_000000097443_46338114 32 K
## 25 AGC_10331396 ACE_G103_000000165822_96587828 22 KWP
## 679 AGC_19486454 ACE_G119_000000026685_44592785 70 K
## 1352 AGC_28603842 ACE_G196_000000063741_153664976 14 K
## 610 AGC_18521265 ACE_G158_000000041593_81730025 40 K
## 56 AGC_1069464 ACE_G109_000000000736_79295090 15 K
## 371 AGC_14931481 ACE_G27_000000050805_128184360 42 K
## KEGG seed_ortholog
## 517 1443665.JACA01000026_gene3529
## 327 1137799.GZ78_13105
## 781 491952.Mar181_2423
## 593
## 925 K04564 472759.Nhal_2272
## 1469 313595.P700755_001745
## 1680 K04564 867900.Celly_1246
## 25 313595.P700755_001745
## 679
## 1352 K04564 313594.PI23P_02707
## 610 313594.PI23P_07510
## 56 K03187 391626.OAN307_c42250
## 371 K04564 686578.AFFX01000004_gene3555
## eggNOG_OGs
## 517 COG1573@1|root,COG1573@2|Bacteria,4NG84@976|Bacteroidetes,1HXRH@117743|Flavobacteriia,2YHMM@290174|Aquimarina
## 327 293MU@1|root,2ZR3M@2|Bacteria,1REEY@1224|Proteobacteria,1S3UE@1236|Gammaproteobacteria,1XJID@135619|Oceanospirillales
## 781 293MU@1|root,2ZR3M@2|Bacteria,1REEY@1224|Proteobacteria,1S3UE@1236|Gammaproteobacteria,1XJID@135619|Oceanospirillales
## 593
## 925 COG0605@1|root,COG0605@2|Bacteria,1MVW2@1224|Proteobacteria,1RP7X@1236|Gammaproteobacteria,1WWRN@135613|Chromatiales
## 1469 COG2032@1|root,COG2032@2|Bacteria,4NGGW@976|Bacteroidetes,1HY8P@117743|Flavobacteriia,4C3V1@83612|Psychroflexus
## 1680 COG0605@1|root,COG0605@2|Bacteria,4NDZ4@976|Bacteroidetes,1HX6F@117743|Flavobacteriia
## 25 COG2032@1|root,COG2032@2|Bacteria,4NGGW@976|Bacteroidetes,1HY8P@117743|Flavobacteriia,4C3V1@83612|Psychroflexus
## 679 COG2032@1|root,COG2032@2|Bacteria,4NM88@976|Bacteroidetes,1I17T@117743|Flavobacteriia
## 1352 COG0605@1|root,COG0605@2|Bacteria,4NDZ4@976|Bacteroidetes,1HX6F@117743|Flavobacteriia,3VVG2@52959|Polaribacter
## 610 COG2032@1|root,COG2032@2|Bacteria,4NM88@976|Bacteroidetes,1I17T@117743|Flavobacteriia
## 56 COG2371@1|root,COG2371@2|Bacteria,1MZQZ@1224|Proteobacteria,2TVBW@28211|Alphaproteobacteria
## 371 COG0605@1|root,COG0605@2|Bacteria,1MVW2@1224|Proteobacteria,1RP7X@1236|Gammaproteobacteria
## narr_OG_name narr_OG_cat
## 517 2YHMM@290174|Aquimarina L
## 327 1XJID@135619|Oceanospirillales S
## 781 1XJID@135619|Oceanospirillales S
## 593 L
## 925 1WWRN@135613|Chromatiales P
## 1469 4C3V1@83612|Psychroflexus P
## 1680 1HX6F@117743|Flavobacteriia P
## 25 4C3V1@83612|Psychroflexus P
## 679 1I17T@117743|Flavobacteriia P
## 1352 3VVG2@52959|Polaribacter C
## 610 1I17T@117743|Flavobacteriia P
## 56 2TVBW@28211|Alphaproteobacteria O
## 371 1RP7X@1236|Gammaproteobacteria C
## narr_OG_desc
## 517 Uracil-DNA glycosylase
## 327 Superoxide dismutase
## 781 Superoxide dismutase
## 593 Excises uracil residues from the DNA which can arise as a result of misincorporation of dUMP residues by DNA polymerase or due to deamination of cytosine
## 925 Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 1469 superoxide dismutase activity
## 1680 Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 25 superoxide dismutase activity
## 679 Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 1352 Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 610 Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 56 Involved in urease metallocenter assembly. Binds nickel. Probably functions as a nickel donor during metallocenter assembly
## 371 Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## best_OG_name best_OG_cat
## 517 2TSWU@28211|Alphaproteobacteria L
## 327 1XJID@135619|Oceanospirillales S
## 781 1XJID@135619|Oceanospirillales S
## 593 4NE2B@976|Bacteroidetes L
## 925 1WWRN@135613|Chromatiales C
## 1469 4NGGW@976|Bacteroidetes P
## 1680 4NDZ4@976|Bacteroidetes P
## 25 4NGGW@976|Bacteroidetes P
## 679 4NM88@976|Bacteroidetes P
## 1352 4NDZ4@976|Bacteroidetes P
## 610 4NM88@976|Bacteroidetes P
## 56 2TVBW@28211|Alphaproteobacteria O
## 371 1RP7X@1236|Gammaproteobacteria C
## best_OG_desc
## 517 Uracil-DNA glycosylase
## 327 Superoxide dismutase
## 781 Superoxide dismutase
## 593 Excises uracil residues from the DNA which can arise as a result of misincorporation of dUMP residues by DNA polymerase or due to deamination of cytosine
## 925 Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 1469 Pfam Secreted repeat of
## 1680 Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 25 Pfam Secreted repeat of
## 679 Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 1352 Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 610 Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 56 Involved in urease metallocenter assembly. Binds nickel. Probably functions as a nickel donor during metallocenter assembly
## 371 Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## Preferred_name CAZy
## 517 - -
## 327 - -
## 781 - -
## 593 ung -
## 925 - -
## 1469 - -
## 1680 sodA -
## 25 - -
## 679 sodC -
## 1352 sodA -
## 610 sodC -
## 56 ureE -
## 371 sodB -
## BiGG_Reaction
## 517 -
## 327 -
## 781 -
## 593 -
## 925 -
## 1469 -
## 1680 -
## 25 -
## 679 -
## 1352 -
## 610 -
## 56 -
## 371 iAF1260.b1656,iAPECO1_1312.APECO1_736,iB21_1397.B21_01616,iBWG_1329.BWG_1471,iE2348C_1286.E2348C_1742,iEC042_1314.EC042_1825,iEC55989_1330.EC55989_1824,iECABU_c1320.ECABU_c19090,iECBD_1354.ECBD_1987,iECB_1328.ECB_01626,iECDH10B_1368.ECDH10B_1790,iECDH1ME8569_1439.ECDH1ME8569_1601,iECD_1391.ECD_01626,iECED1_1282.ECED1_1855,iECH74115_1262.ECH74115_2368,iECIAI1_1343.ECIAI1_1708,iECO103_1326.ECO103_1797,iECO111_1330.ECO111_2126,iECO26_1355.ECO26_2385,iECOK1_1307.ECOK1_1775,iECP_1309.ECP_1601,iECS88_1305.ECS88_1705,iECSE_1348.ECSE_1780,iECSP_1301.ECSP_2222,iECUMN_1333.ECUMN_1946,iECW_1372.ECW_m1823,iECs_1301.ECs2365,iEKO11_1354.EKO11_2118,iETEC_1333.ETEC_1691,iEcDH1_1363.EcDH1_1984,iEcE24377_1341.EcE24377A_1869,iEcHS_1320.EcHS_A1735,iEcSMS35_1347.EcSMS35_1542,iEcolC_1368.EcolC_1973,iG2583_1286.G2583_2051,iJO1366.b1656,iJR904.b1656,iLF82_1304.LF82_2148,iNRG857_1313.NRG857_08300,iSBO_1134.SBO_1475,iSDY_1059.SDY_1882,iSFV_1184.SFV_1678,iSF_1195.SF1684,iSFxv_1172.SFxv_1892,iSSON_1240.SSON_1500,iS_1188.S1816,iSbBS512_1146.SbBS512_E1853,iUMN146_1321.UM146_08870,iUMNK88_1353.UMNK88_2117,iUTI89_1310.UTI89_C1847,iWFL_1372.ECW_m1823,iY75_1357.Y75_RS08680,iZ_1308.Z2678,ic_1306.c2050
## PFAMs Species_uniref Genus_uniref
## 517 UDG uc_Nitrospinae uc_Nitrospinae
## 327 Sod_Ni uc_Flavobacteriaceae Polaribacter
## 781 Sod_Ni unknown unknown
## 593 UDG Flavobacteriaceae bacterium unknown
## 925 Sod_Fe_C,Sod_Fe_N Glaciecola nitratireducens unknown
## 1469 CHRD Polaribacter irgensii Psychroflexus
## 1680 Sod_Fe_C,Sod_Fe_N Flavobacteriales bacterium unknown
## 25 CHRD Psychroflexus gondwanensis Psychroflexus
## 679 Sod_Cu unidentified eubacterium SCB49 unknown
## 1352 Sod_Fe_C,Sod_Fe_N uc_Polaribacter Polaribacter
## 610 Sod_Cu Polaribacter sp. Polaribacter
## 56 UreE_C,UreE_N unknown unknown
## 371 Sod_Fe_C,Sod_Fe_N unknown unknown
## Family_uniref Order_uniref Class_uniref Phylum_uniref
## 517 uc_Nitrospinae Rhodobacterales Alphaproteobacteria Nitrospinae
## 327 Flavobacteriaceae Flavobacteriales Flavobacteriia Bacteroidetes
## 781 unknown unknown unknown unknown
## 593 Flavobacteriaceae Flavobacteriales Flavobacteriia Bacteroidetes
## 925 unknown Alteromonadales Gammaproteobacteria Proteobacteria
## 1469 Flavobacteriaceae Flavobacteriales Flavobacteriia Bacteroidetes
## 1680 unknown Flavobacteriales Flavobacteriia Bacteroidetes
## 25 Flavobacteriaceae Flavobacteriales Flavobacteriia Bacteroidetes
## 679 Flavobacteriaceae Flavobacteriales Flavobacteriia Bacteroidetes
## 1352 Flavobacteriaceae Flavobacteriales Flavobacteriia Bacteroidetes
## 610 Flavobacteriaceae Flavobacteriales Flavobacteriia Bacteroidetes
## 56 unknown unknown unknown unknown
## 371 unknown unknown unknown unknown
## Domain_uniref Domain_gtdb Phylum_gtdb Class_gtdb
## 517 unknown Bacteria Nitrospinota Nitrospinia
## 327 unknown Bacteria Bacteroidota Bacteroidia
## 781 unknown Bacteria Pseudomonadota Gammaproteobacteria
## 593 unknown Bacteria Bacteroidota Bacteroidia
## 925 unknown Bacteria Pseudomonadota Gammaproteobacteria
## 1469 unknown Bacteria Bacteroidota Bacteroidia
## 1680 unknown Bacteria Bacteroidota Bacteroidia
## 25 unknown Bacteria Bacteroidota Bacteroidia
## 679 unknown Bacteria Bacteroidota Bacteroidia
## 1352 unknown Bacteria Bacteroidota Bacteroidia
## 610 unknown Bacteria Bacteroidota Bacteroidia
## 56 unknown Bacteria Pseudomonadota Alphaproteobacteria
## 371 unknown Bacteria Pseudomonadota Gammaproteobacteria
## Order_gtdb Family_gtdb Genus_gtdb
## 517 Nitrospinales VA-1 UBA8687
## 327 Flavobacteriales Flavobacteriaceae Winogradskyella
## 781 Pseudomonadales Porticoccaceae HTCC2207
## 593 Flavobacteriales Flavobacteriaceae SGZJ01
## 925 GCA-001735895 GCA-001735895 GCA-001735895
## 1469 Flavobacteriales Flavobacteriaceae Polaribacter
## 1680 Flavobacteriales Flavobacteriaceae UWMA-0277
## 25 Flavobacteriales Flavobacteriaceae Polaribacter
## 679 Flavobacteriales Flavobacteriaceae GCA-002733185
## 1352 Flavobacteriales Flavobacteriaceae Polaribacter
## 610 Flavobacteriales Flavobacteriaceae Polaribacter
## 56 Rhodobacterales Rhodobacteraceae Octadecabacter
## 371 Pseudomonadales Pseudomonadaceae Halopseudomonas
## Species_gtdb
## 517 UBA8687 sp013349585
## 327 Winogradskyella sp002364335
## 781 HTCC2207 sp913031645
## 593 SGZJ01 sp028825365
## 925 GCA-001735895 sp913061315
## 1469 Polaribacter irgensii
## 1680 UWMA-0277 sp905612305
## 25 Nonlabens sp012974865
## 679 GCA-002733185 sp030591585
## 1352 Polaribacter sp913050305
## 610 Polaribacter sp003537695
## 56 Octadecabacter sp028825445
## 371 Halopseudomonas neustonica
MertzGenes <- Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID %in% row.names(MertzAGC)),]
#MertzGenes
# Check the taxo within those that are dominated by unknown
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_20916161"),]
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_1069464"),]
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_14931481"),]
# 3 others are correlated to dNi yet in between station 11 and 8
DNiAGC <- AGCScore[which(AGCScore$RDA1>0.5 & AGCScore$RDA2<(-0.5)),]
DNiAGC <- DNiAGC[order(DNiAGC$RDA1, decreasing = T),]
DNiAGCannot <- AGC_Taxo_Annot[which(AGC_Taxo_Annot$AGC_ID %in% row.names(DNiAGC)),]
DNiAGCannot[match(row.names(DNiAGC), DNiAGCannot$AGC_ID),]
## AGC_ID AGC_Rep AGC_Size AGC_Cat Singl_Cat
## 354 AGC_14687886 ACE_G104_000000029829_114121823 154 K
## 342 AGC_14548656 ACE_G143_000000003166_98493499 94 K
## 939 AGC_23057241 ACE_G125_000000206004_18635940 72 K
## KEGG seed_ortholog
## 354 K03187 1342299.Z947_2243
## 342 K03188 391616.OA238_c03380
## 939 K03190 666509.RCA23_c08820
## eggNOG_OGs
## 354 COG2371@1|root,COG2371@2|Bacteria,1MZQZ@1224|Proteobacteria,2TVBW@28211|Alphaproteobacteria,3ZXMP@60136|Sulfitobacter
## 342 COG0830@1|root,COG0830@2|Bacteria,1MW8Q@1224|Proteobacteria,2TRVW@28211|Alphaproteobacteria
## 939 COG0829@1|root,COG0829@2|Bacteria,1RABD@1224|Proteobacteria,2U57R@28211|Alphaproteobacteria
## narr_OG_name narr_OG_cat
## 354 3ZXMP@60136|Sulfitobacter O
## 342 2TRVW@28211|Alphaproteobacteria O
## 939 2U57R@28211|Alphaproteobacteria O
## narr_OG_desc
## 354 Involved in urease metallocenter assembly. Binds nickel. Probably functions as a nickel donor during metallocenter assembly
## 342 Required for maturation of urease via the functional incorporation of the urease nickel metallocenter
## 939 Required for maturation of urease via the functional incorporation of the urease nickel metallocenter
## best_OG_name best_OG_cat
## 354 2TVBW@28211|Alphaproteobacteria O
## 342 2TRVW@28211|Alphaproteobacteria O
## 939 2U57R@28211|Alphaproteobacteria O
## best_OG_desc
## 354 Involved in urease metallocenter assembly. Binds nickel. Probably functions as a nickel donor during metallocenter assembly
## 342 Required for maturation of urease via the functional incorporation of the urease nickel metallocenter
## 939 Required for maturation of urease via the functional incorporation of the urease nickel metallocenter
## Preferred_name CAZy BiGG_Reaction PFAMs Species_uniref
## 354 ureE - - UreE_C,UreE_N unknown
## 342 ureF - - UreF unknown
## 939 ureD - - UreD Rhodobacteraceae bacterium
## Genus_uniref Family_uniref Order_uniref Class_uniref
## 354 unknown unknown unknown unknown
## 342 unknown unknown unknown unknown
## 939 unknown Rhodobacteraceae Rhodobacterales Alphaproteobacteria
## Phylum_uniref Domain_uniref Domain_gtdb Phylum_gtdb
## 354 unknown unknown Bacteria Pseudomonadota
## 342 Proteobacteria unknown Bacteria Pseudomonadota
## 939 Proteobacteria unknown Bacteria Pseudomonadota
## Class_gtdb Order_gtdb Family_gtdb Genus_gtdb
## 354 Alphaproteobacteria Rhodobacterales Rhodobacteraceae Yoonia
## 342 Alphaproteobacteria Rhodobacterales Rhodobacteraceae Octadecabacter
## 939 Alphaproteobacteria Rhodobacterales Rhodobacteraceae Ascidiaceihabitans
## Species_gtdb
## 354 Yoonia sp023955335
## 342 Octadecabacter sp028825445
## 939 Ascidiaceihabitans sp905182355
DNiAGCannot
## AGC_ID AGC_Rep AGC_Size AGC_Cat Singl_Cat
## 342 AGC_14548656 ACE_G143_000000003166_98493499 94 K
## 354 AGC_14687886 ACE_G104_000000029829_114121823 154 K
## 939 AGC_23057241 ACE_G125_000000206004_18635940 72 K
## KEGG seed_ortholog
## 342 K03188 391616.OA238_c03380
## 354 K03187 1342299.Z947_2243
## 939 K03190 666509.RCA23_c08820
## eggNOG_OGs
## 342 COG0830@1|root,COG0830@2|Bacteria,1MW8Q@1224|Proteobacteria,2TRVW@28211|Alphaproteobacteria
## 354 COG2371@1|root,COG2371@2|Bacteria,1MZQZ@1224|Proteobacteria,2TVBW@28211|Alphaproteobacteria,3ZXMP@60136|Sulfitobacter
## 939 COG0829@1|root,COG0829@2|Bacteria,1RABD@1224|Proteobacteria,2U57R@28211|Alphaproteobacteria
## narr_OG_name narr_OG_cat
## 342 2TRVW@28211|Alphaproteobacteria O
## 354 3ZXMP@60136|Sulfitobacter O
## 939 2U57R@28211|Alphaproteobacteria O
## narr_OG_desc
## 342 Required for maturation of urease via the functional incorporation of the urease nickel metallocenter
## 354 Involved in urease metallocenter assembly. Binds nickel. Probably functions as a nickel donor during metallocenter assembly
## 939 Required for maturation of urease via the functional incorporation of the urease nickel metallocenter
## best_OG_name best_OG_cat
## 342 2TRVW@28211|Alphaproteobacteria O
## 354 2TVBW@28211|Alphaproteobacteria O
## 939 2U57R@28211|Alphaproteobacteria O
## best_OG_desc
## 342 Required for maturation of urease via the functional incorporation of the urease nickel metallocenter
## 354 Involved in urease metallocenter assembly. Binds nickel. Probably functions as a nickel donor during metallocenter assembly
## 939 Required for maturation of urease via the functional incorporation of the urease nickel metallocenter
## Preferred_name CAZy BiGG_Reaction PFAMs Species_uniref
## 342 ureF - - UreF unknown
## 354 ureE - - UreE_C,UreE_N unknown
## 939 ureD - - UreD Rhodobacteraceae bacterium
## Genus_uniref Family_uniref Order_uniref Class_uniref
## 342 unknown unknown unknown unknown
## 354 unknown unknown unknown unknown
## 939 unknown Rhodobacteraceae Rhodobacterales Alphaproteobacteria
## Phylum_uniref Domain_uniref Domain_gtdb Phylum_gtdb
## 342 Proteobacteria unknown Bacteria Pseudomonadota
## 354 unknown unknown Bacteria Pseudomonadota
## 939 Proteobacteria unknown Bacteria Pseudomonadota
## Class_gtdb Order_gtdb Family_gtdb Genus_gtdb
## 342 Alphaproteobacteria Rhodobacterales Rhodobacteraceae Octadecabacter
## 354 Alphaproteobacteria Rhodobacterales Rhodobacteraceae Yoonia
## 939 Alphaproteobacteria Rhodobacterales Rhodobacteraceae Ascidiaceihabitans
## Species_gtdb
## 342 Octadecabacter sp028825445
## 354 Yoonia sp023955335
## 939 Ascidiaceihabitans sp905182355
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_14687886"),]
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_14548656"),]
# 24 are associated to station 8
ST8AGC <- AGCScore[which((AGCScore$RDA1<(0.5) & AGCScore$RDA1>(0) & AGCScore$RDA2<(-0.5)) | (AGCScore$RDA1<(0) & AGCScore$RDA1>(-0.5) & AGCScore$RDA2<(-0.75))),]
ST8AGC <- ST8AGC[order(ST8AGC$RDA1, decreasing = T),]
ST8AGCannot <- AGC_Taxo_Annot[which(AGC_Taxo_Annot$AGC_ID %in% row.names(ST8AGC)),]
ST8AGCannot <- ST8AGCannot[match(row.names(ST8AGC), ST8AGCannot$AGC_ID),]
ST8AGCannot
## AGC_ID AGC_Rep AGC_Size AGC_Cat Singl_Cat
## 964 AGC_23334844 ACE_G66_000000022940_50631213 54 K
## 1804 AGC_7792447 ACE_G36_000000069010_49679286 2463 K
## 362 AGC_14840044 ACE_G92_000000082546_139792161 852 K
## 298 AGC_13989203 ACE_G123_000000257491_88665121 275 K
## 88 AGC_11093182 ACE_G80_000000112913_174111159 20 K
## 836 AGC_2164110 ACE_G157_000000108833_151819534 169 K
## 544 AGC_17472745 ACE_G15_000000053663_108120463 165 K
## 618 AGC_18601609 ACE_G173_000000057257_152747621 31 K
## 1081 AGC_24807388 ACE_G188_000000079517_144668771 114 K
## 588 AGC_18108257 ACE_G159_000000214533_108087016 29 K
## 1837 AGC_8142656 ACE_G174_000000515234_74005539 22 K
## 1169 AGC_25996182 ACE_G194_000000189051_74736089 57 K
## 1183 AGC_26196954 ACE_G48_000000219824_6215485 31 K
## 1419 AGC_29397748 ACE_G172_000000569973_65083237 240 K
## 1542 AGC_4137832 ACE_G173_000000327145_47585756 45 GU
## 1375 AGC_28890756 ACE_G93_000000207684_131098583 25 GU
## 1504 AGC_3526244 ACE_G86_000000312768_16686896 21 GU
## 730 AGC_20290509 ACE_G86_000000265023_148188009 18 GU
## 1290 AGC_27773751 ACE_G89_000000251289_113307982 20 GU
## 1633 AGC_5500562 ACE_G159_000000163877_143139768 40 GU
## 650 AGC_19008763 ACE_G175_000000374355_3929490 57 KWP
## 822 AGC_21493453 ACE_G28_000000006429_101950529 71 KWP
## 100 AGC_11214517 ACE_G92_000000395364_122293427 38 GU
## 250 AGC_13479537 ACE_G88_000000238137_165809251 67 K
## KEGG seed_ortholog
## 964 1342301.JASD01000008_gene2370
## 1804 K04564 666509.RCA23_c18480
## 362 44056.XP_009034551.1
## 298 K04564 666509.RCA23_c18480
## 88 K04565 41875.XP_007508895.1
## 836 1298865.H978DRAFT_2911
## 544 313596.RB2501_04370
## 618 K04564 296587.XP_002507909.1
## 1081 44056.XP_009039471.1
## 588 41875.XP_007509011.1
## 1837 K03187 313625.BL107_06974
## 1169 K03190 41875.XP_007515132.1
## 1183 2903.EOD39616
## 1419 243090.RB12634
## 1542 313594.PI23P_11007
## 1375 344747.PM8797T_02424
## 1504 313594.PI23P_11007
## 730 344747.PM8797T_02424
## 1290 313595.P700755_001745
## 1633 886377.Murru_2609
## 650 439493.PB7211_332
## 822 1265756.AWZW01000008_gene1247
## 100 313594.PI23P_11007
## 250 1131266.ARWQ01000023_gene385
## eggNOG_OGs
## 964 COG0378@1|root,COG0378@2|Bacteria,1MVBD@1224|Proteobacteria,2TQMU@28211|Alphaproteobacteria,3ZWJ9@60136|Sulfitobacter
## 1804 COG0605@1|root,COG0605@2|Bacteria,1MVW2@1224|Proteobacteria,2TU3T@28211|Alphaproteobacteria
## 362 COG5272@1|root,KOG0001@2759|Eukaryota
## 298 COG0605@1|root,COG0605@2|Bacteria,1MVW2@1224|Proteobacteria,2TU3T@28211|Alphaproteobacteria
## 88 COG2032@1|root,KOG0441@2759|Eukaryota,37Q1U@33090|Viridiplantae
## 836 293MU@1|root,2ZR3M@2|Bacteria,1REEY@1224|Proteobacteria,1S3UE@1236|Gammaproteobacteria,46B1B@72275|Alteromonadaceae
## 544 COG0374@1|root,COG0374@2|Bacteria,4NJIP@976|Bacteroidetes,1I3BR@117743|Flavobacteriia
## 618 COG0605@1|root,KOG0876@2759|Eukaryota,37MWT@33090|Viridiplantae,34HM7@3041|Chlorophyta
## 1081 COG0804@1|root,2QPKB@2759|Eukaryota
## 588 COG2608@1|root,KOG4656@2759|Eukaryota,37QGH@33090|Viridiplantae
## 1837 COG2371@1|root,COG2371@2|Bacteria,1G6TF@1117|Cyanobacteria,1H0TA@1129|Synechococcus
## 1169 COG0138@1|root,2QSQN@2759|Eukaryota,37QE5@33090|Viridiplantae
## 1183 2A42S@1|root,2S3XN@2759|Eukaryota
## 1419 2A42S@1|root,30SMP@2|Bacteria
## 1542 COG2032@1|root,COG2032@2|Bacteria,4NGGW@976|Bacteroidetes,1HY8P@117743|Flavobacteriia,3VWSB@52959|Polaribacter
## 1375 COG1404@1|root,COG2931@1|root,COG3209@1|root,COG3210@1|root,COG4932@1|root,COG5276@1|root,COG1404@2|Bacteria,COG2931@2|Bacteria,COG3209@2|Bacteria,COG3210@2|Bacteria,COG4932@2|Bacteria,COG5276@2|Bacteria
## 1504 COG2032@1|root,COG2032@2|Bacteria,4NGGW@976|Bacteroidetes,1HY8P@117743|Flavobacteriia,3VWSB@52959|Polaribacter
## 730 COG1404@1|root,COG2931@1|root,COG3209@1|root,COG3210@1|root,COG4932@1|root,COG5276@1|root,COG1404@2|Bacteria,COG2931@2|Bacteria,COG3209@2|Bacteria,COG3210@2|Bacteria,COG4932@2|Bacteria,COG5276@2|Bacteria
## 1290 COG2032@1|root,COG2032@2|Bacteria,4NGGW@976|Bacteroidetes,1HY8P@117743|Flavobacteriia,4C3V1@83612|Psychroflexus
## 1633 COG4315@1|root,COG4315@2|Bacteria,4NI3N@976|Bacteroidetes,1HYWK@117743|Flavobacteriia
## 650 COG1573@1|root,COG1573@2|Bacteria,1MW8T@1224|Proteobacteria,2TSAR@28211|Alphaproteobacteria,4BT8H@82117|unclassified Alphaproteobacteria
## 822 COG1573@1|root,COG1573@2|Bacteria,1MW91@1224|Proteobacteria,2TSUP@28211|Alphaproteobacteria,4BQ5P@82117|unclassified Alphaproteobacteria
## 100 COG2032@1|root,COG2032@2|Bacteria,4NGGW@976|Bacteroidetes,1HY8P@117743|Flavobacteriia,3VWSB@52959|Polaribacter
## 250 COG0831@1|root,arCOG04528@2157|Archaea,41SPI@651137|Thaumarchaeota
## narr_OG_name
## 964 3ZWJ9@60136|Sulfitobacter
## 1804 2TU3T@28211|Alphaproteobacteria
## 362 KOG0001@2759|Eukaryota
## 298 2TU3T@28211|Alphaproteobacteria
## 88 37Q1U@33090|Viridiplantae
## 836 46B1B@72275|Alteromonadaceae
## 544 1I3BR@117743|Flavobacteriia
## 618 34HM7@3041|Chlorophyta
## 1081 2QPKB@2759|Eukaryota
## 588 37QGH@33090|Viridiplantae
## 1837 1H0TA@1129|Synechococcus
## 1169 37QE5@33090|Viridiplantae
## 1183 2S3XN@2759|Eukaryota
## 1419 30SMP@2|Bacteria
## 1542 3VWSB@52959|Polaribacter
## 1375 COG1404@2|Bacteria / COG2931@2|Bacteria / COG3209@2|Bacteria / COG3210@2|Bacteria / COG4932@2|Bacteria / COG5276@2|Bacteria
## 1504 3VWSB@52959|Polaribacter
## 730 COG1404@2|Bacteria / COG2931@2|Bacteria / COG3209@2|Bacteria / COG3210@2|Bacteria / COG4932@2|Bacteria / COG5276@2|Bacteria
## 1290 4C3V1@83612|Psychroflexus
## 1633 1HYWK@117743|Flavobacteriia
## 650 4BT8H@82117|unclassified Alphaproteobacteria
## 822 4BQ5P@82117|unclassified Alphaproteobacteria
## 100 3VWSB@52959|Polaribacter
## 250 41SPI@651137|Thaumarchaeota
## narr_OG_cat
## 964 KO
## 1804 P
## 362 O
## 298 C
## 88 P
## 836 S
## 544 C
## 618 P
## 1081 E
## 588 P
## 1837 O
## 1169 F
## 1183 S
## 1419 S
## 1542 P
## 1375 O / Q / M / U / M / -
## 1504 P
## 730 O / Q / M / U / M / -
## 1290 P
## 1633 S
## 650 L
## 822 L
## 100 P
## 250 E
## narr_OG_desc
## 964 Facilitates the functional incorporation of the urease nickel metallocenter. This process requires GTP hydrolysis, probably effectuated by UreG
## 1804 Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 362 cellular macromolecule catabolic process
## 298 Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 88 Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 836 Nickel-containing superoxide dismutase
## 544 hydrogenase large subunit
## 618 Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 1081 urease activity
## 588 Copper chaperone for superoxide dismutase
## 1837 Involved in urease metallocenter assembly. Binds nickel. Probably functions as a nickel donor during metallocenter assembly
## 1169 UreD urease accessory protein
## 1183 Nickel-containing superoxide dismutase
## 1419 Nickel-containing superoxide dismutase
## 1542 superoxide dismutase activity
## 1375 Belongs to the peptidase S8 family / calcium- and calmodulin-responsive adenylate cyclase activity / self proteolysis / domain, Protein / domain protein / -
## 1504 superoxide dismutase activity
## 730 Belongs to the peptidase S8 family / calcium- and calmodulin-responsive adenylate cyclase activity / self proteolysis / domain, Protein / domain protein / -
## 1290 superoxide dismutase activity
## 1633 Secreted repeat of unknown function
## 650 Uracil DNA glycosylase superfamily
## 822 Uracil DNA glycosylase superfamily
## 100 superoxide dismutase activity
## 250 Belongs to the urease gamma subunit family
## best_OG_name
## 964 2TQMU@28211|Alphaproteobacteria
## 1804 2TU3T@28211|Alphaproteobacteria
## 362 KOG0001@2759|Eukaryota
## 298 2TU3T@28211|Alphaproteobacteria
## 88 37Q1U@33090|Viridiplantae
## 836 1S3UE@1236|Gammaproteobacteria
## 544 4NJIP@976|Bacteroidetes
## 618 34HM7@3041|Chlorophyta
## 1081 2QPKB@2759|Eukaryota
## 588 37QGH@33090|Viridiplantae
## 1837 1G6TF@1117|Cyanobacteria
## 1169 37QE5@33090|Viridiplantae
## 1183 2S3XN@2759|Eukaryota
## 1419 30SMP@2|Bacteria
## 1542 4NGGW@976|Bacteroidetes
## 1375 COG1404@2|Bacteria / COG2931@2|Bacteria / COG3209@2|Bacteria / COG3210@2|Bacteria / COG4932@2|Bacteria / COG5276@2|Bacteria
## 1504 4NGGW@976|Bacteroidetes
## 730 COG1404@2|Bacteria / COG2931@2|Bacteria / COG3209@2|Bacteria / COG3210@2|Bacteria / COG4932@2|Bacteria / COG5276@2|Bacteria
## 1290 4NGGW@976|Bacteroidetes
## 1633 4NI3N@976|Bacteroidetes
## 650 2TSAR@28211|Alphaproteobacteria
## 822 2TSUP@28211|Alphaproteobacteria
## 100 4NGGW@976|Bacteroidetes
## 250 41SPI@651137|Thaumarchaeota
## best_OG_cat
## 964 KO
## 1804 P
## 362 O
## 298 P
## 88 P
## 836 S
## 544 C
## 618 P
## 1081 E
## 588 P
## 1837 O
## 1169 F
## 1183 S
## 1419 S
## 1542 P
## 1375 O / Q / M / U / M / -
## 1504 P
## 730 O / Q / M / U / M / -
## 1290 P
## 1633 M
## 650 L
## 822 L
## 100 P
## 250 E
## best_OG_desc
## 964 Facilitates the functional incorporation of the urease nickel metallocenter. This process requires GTP hydrolysis, probably effectuated by UreG
## 1804 Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 362 cellular macromolecule catabolic process
## 298 Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 88 Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 836 Superoxide dismutase
## 544 PFAM Nickel-dependent hydrogenase, large subunit
## 618 Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 1081 urease activity
## 588 Copper chaperone for superoxide dismutase
## 1837 Involved in urease metallocenter assembly. Binds nickel. Probably functions as a nickel donor during metallocenter assembly
## 1169 urease accessory protein
## 1183 Nickel-containing superoxide dismutase
## 1419 Nickel-containing superoxide dismutase
## 1542 Pfam Secreted repeat of
## 1375 Belongs to the peptidase S8 family / calcium- and calmodulin-responsive adenylate cyclase activity / self proteolysis / domain, Protein / domain protein / -
## 1504 Pfam Secreted repeat of
## 730 Belongs to the peptidase S8 family / calcium- and calmodulin-responsive adenylate cyclase activity / self proteolysis / domain, Protein / domain protein / -
## 1290 Pfam Secreted repeat of
## 1633 PFAM Secreted repeat of
## 650 uracil-DNA glycosylase
## 822 uracil-DNA glycosylase
## 100 Pfam Secreted repeat of
## 250 Belongs to the urease gamma subunit family
## Preferred_name CAZy BiGG_Reaction
## 964 ureG - -
## 1804 sodB - -
## 362 - - -
## 298 sodB - -
## 88 SODCP - -
## 836 - - -
## 544 - - -
## 618 MSD - -
## 1081 URE1 - -
## 588 CCS - -
## 1837 ureE - -
## 1169 - - -
## 1183 - - -
## 1419 - - -
## 1542 - - -
## 1375 - GH5,GH9 -
## 1504 - - -
## 730 - GH5,GH9 -
## 1290 - - -
## 1633 - - -
## 650 MA20_15960 - -
## 822 ung - -
## 100 - - -
## 250 ureA - -
## PFAMs
## 964 cobW
## 1804 Sod_Fe_C,Sod_Fe_N
## 362 ubiquitin
## 298 Sod_Fe_C,Sod_Fe_N
## 88 Sod_Cu
## 836 Sod_Ni
## 544 NiFeSe_Hases
## 618 Sod_Fe_C,Sod_Fe_N
## 1081 Amidohydro_1,DUF599,Fungal_trans,HMG_box,Urease_alpha,Urease_beta,Urease_gamma
## 588 HMA,Sod_Cu
## 1837 UreE_C,UreE_N
## 1169 UreD,UreF
## 1183 Sod_Ni
## 1419 Sod_Ni
## 1542 CHRD
## 1375 5_nucleotid_C,ASH,Autotransporter,Beta_helix,Big_2,Big_3_2,Big_3_3,Big_3_5,Big_5,CARDB,CBM60,CBM_6,CCP_MauG,CHRD,CHU_C,Cadherin,Cadherin-like,Cadherin_3,Calx-beta,CarboxypepD_reg,Cna_B,Cohesin,Collagen_bind,Collar,DUF11,DUF1573,DUF1929,DUF285,DUF3616,DUF4214,DUF4347,DUF4394,DUF5011,DUF5122,DUF637,DUF839,Exo_endo_phos,FG-GAP,FG-GAP_2,FIVAR,FctA,Fil_haemagg,FlgD_ig,Flg_new,Fn3_assoc,GDPD,GSDH,Gly_rich,Glyco_hydro_9,Glyoxal_oxid_N,Gram_pos_anchor,HYR,Haemagg_act,He_PIG,HemolysinCabind,Kelch_1,Kelch_4,LRR_4,LRR_adjacent,LTD,LVIVD,Lactonase,Laminin_G_3,Lectin_leg-like,Lipase,Lipase_GDSL,Lipase_GDSL_2,M60-like_N,Metallophos,MucBP,Mucin_bdg,N_methyl,PA14,PKD,PPC,P_proprotein,Paired_CXXCH_1,Peptidase_C2,Peptidase_M10_C,Peptidase_M14,Peptidase_M60,Peptidase_M64,Peptidase_M8,Peptidase_S8,Peptidase_S9,Pesticin,Pput2613-deam,Pur_ac_phosph_N,Pyocin_S,RGM_C,RHS_repeat,Reprolysin_3,Reprolysin_5,SASA,SBBP,SLH,SdiA-regulated,SdrD_B,SprB,SpvB,TSP_C,TrbI,Trypsin,VCBS,VPEP,VWA,VWD,WxL,fn3
## 1504 CHRD
## 730 5_nucleotid_C,ASH,Autotransporter,Beta_helix,Big_2,Big_3_2,Big_3_3,Big_3_5,Big_5,CARDB,CBM60,CBM_6,CCP_MauG,CHRD,CHU_C,Cadherin,Cadherin-like,Cadherin_3,Calx-beta,CarboxypepD_reg,Cna_B,Cohesin,Collagen_bind,Collar,DUF11,DUF1573,DUF1929,DUF285,DUF3616,DUF4214,DUF4347,DUF4394,DUF5011,DUF5122,DUF637,DUF839,Exo_endo_phos,FG-GAP,FG-GAP_2,FIVAR,FctA,Fil_haemagg,FlgD_ig,Flg_new,Fn3_assoc,GDPD,GSDH,Gly_rich,Glyco_hydro_9,Glyoxal_oxid_N,Gram_pos_anchor,HYR,Haemagg_act,He_PIG,HemolysinCabind,Kelch_1,Kelch_4,LRR_4,LRR_adjacent,LTD,LVIVD,Lactonase,Laminin_G_3,Lectin_leg-like,Lipase,Lipase_GDSL,Lipase_GDSL_2,M60-like_N,Metallophos,MucBP,Mucin_bdg,N_methyl,PA14,PKD,PPC,P_proprotein,Paired_CXXCH_1,Peptidase_C2,Peptidase_M10_C,Peptidase_M14,Peptidase_M60,Peptidase_M64,Peptidase_M8,Peptidase_S8,Peptidase_S9,Pesticin,Pput2613-deam,Pur_ac_phosph_N,Pyocin_S,RGM_C,RHS_repeat,Reprolysin_3,Reprolysin_5,SASA,SBBP,SLH,SdiA-regulated,SdrD_B,SprB,SpvB,TSP_C,TrbI,Trypsin,VCBS,VPEP,VWA,VWD,WxL,fn3
## 1290 CHRD
## 1633 Fasciclin,Lipoprotein_15
## 650 DUF4130,UDG
## 822 UDG
## 100 CHRD
## 250 Urease_beta,Urease_gamma
## Species_uniref Genus_uniref Family_uniref
## 964 uc_Rhodobacterales unknown unknown
## 1804 unknown unknown unknown
## 362 uc_Bacillariophyta uc_Bacillariophyta uc_Bacillariophyta
## 298 Planktomarina sp. unknown Rhodobacteraceae
## 88 Bathycoccus prasinos Bathycoccus Bathycoccaceae
## 836 unknown unknown unknown
## 544 uc_Bacteria uc_Bacteria uc_Bacteria
## 618 Prasinoderma singulare Prasinoderma unknown
## 1081 uc_Eukaryota uc_Eukaryota uc_Eukaryota
## 588 Bathycoccus prasinos Bathycoccus Bathycoccaceae
## 1837 unknown Synechococcus Synechococcaceae
## 1169 uc_Eukaryota uc_Eukaryota uc_Eukaryota
## 1183 Phaeocystis antarctica unknown Phaeocystaceae
## 1419 Opitutae bacterium unknown unknown
## 1542 Planctomycetaceae bacterium unknown Planctomycetaceae
## 1375 Planctomycetaceae bacterium unknown Planctomycetaceae
## 1504 Planctomycetaceae bacterium unknown Planctomycetaceae
## 730 Planctomycetaceae bacterium TMED138 unknown Planctomycetaceae
## 1290 Planctomycetaceae bacterium unknown Planctomycetaceae
## 1633 Rhodobiaceae bacterium unknown Rhodobiaceae
## 650 Rhodobiaceae bacterium unknown Rhodobiaceae
## 822 uc_Archaea uc_Archaea uc_Archaea
## 100 Planctomycetaceae bacterium unknown Planctomycetaceae
## 250 unknown unknown unknown
## Order_uniref Class_uniref Phylum_uniref Domain_uniref
## 964 Rhodobacterales Alphaproteobacteria Proteobacteria unknown
## 1804 unknown Alphaproteobacteria Proteobacteria unknown
## 362 uc_Bacillariophyta uc_Bacillariophyta Bacillariophyta unknown
## 298 Rhodobacterales Alphaproteobacteria Proteobacteria unknown
## 88 Mamiellales Mamiellophyceae Chlorophyta Viridiplantae
## 836 unknown Gammaproteobacteria Proteobacteria unknown
## 544 uc_Bacteria uc_Bacteria uc_Bacteria uc_Bacteria
## 618 Pelagomonadales Pelagophyceae unknown unknown
## 1081 uc_Eukaryota uc_Eukaryota uc_Eukaryota uc_Eukaryota
## 588 Mamiellales Mamiellophyceae Chlorophyta Viridiplantae
## 1837 Synechococcales unknown Cyanobacteria unknown
## 1169 uc_Eukaryota uc_Eukaryota Chlorophyta Viridiplantae
## 1183 Phaeocystales unknown Haptophyta unknown
## 1419 unknown Opitutae Planctomycetes unknown
## 1542 Planctomycetales Planctomycetia Planctomycetes unknown
## 1375 Planctomycetales Planctomycetia Planctomycetes unknown
## 1504 Planctomycetales Planctomycetia Planctomycetes unknown
## 730 Planctomycetales Planctomycetia Planctomycetes unknown
## 1290 Planctomycetales Planctomycetia Planctomycetes unknown
## 1633 Hyphomicrobiales Alphaproteobacteria Proteobacteria unknown
## 650 Hyphomicrobiales Alphaproteobacteria Proteobacteria unknown
## 822 uc_Archaea uc_Archaea uc_Archaea uc_Archaea
## 100 Planctomycetales Planctomycetia Planctomycetes unknown
## 250 unknown unknown Proteobacteria unknown
## Domain_gtdb Phylum_gtdb Class_gtdb Order_gtdb
## 964 Bacteria Pseudomonadota Alphaproteobacteria Rhodobacterales
## 1804 Bacteria Pseudomonadota Alphaproteobacteria Rhodobacterales
## 362 Bacteria Actinomycetota Actinomycetes Propionibacteriales
## 298 Bacteria Pseudomonadota Alphaproteobacteria Rhodobacterales
## 88 Bacteria Bacteroidota Bacteroidia Chitinophagales
## 836 Bacteria Pseudomonadota Gammaproteobacteria Pseudomonadales
## 544 Bacteria Bacteroidota Bacteroidia Cytophagales
## 618 Bacteria Actinomycetota Actinomycetes Actinomycetales
## 1081 Bacteria Actinomycetota Actinomycetes Streptomycetales
## 588 Bacteria Bacteroidota Bacteroidia Chitinophagales
## 1837 Bacteria Cyanobacteriota Cyanobacteriia PCC-6307
## 1169 Bacteria Actinomycetota Actinomycetes Mycobacteriales
## 1183 Bacteria Actinomycetota Actinomycetes Streptomycetales
## 1419 Bacteria Planctomycetota Planctomycetia Pirellulales
## 1542 Bacteria Planctomycetota Planctomycetia Pirellulales
## 1375 Bacteria Planctomycetota Planctomycetia Pirellulales
## 1504 Bacteria Planctomycetota Planctomycetia Pirellulales
## 730 Bacteria Planctomycetota Planctomycetia Pirellulales
## 1290 Bacteria Planctomycetota Planctomycetia Pirellulales
## 1633 Bacteria Pseudomonadota Alphaproteobacteria MED-G09
## 650 Bacteria Pseudomonadota Alphaproteobacteria MED-G09
## 822 Archaea Thermoplasmatota Poseidoniia MGIII
## 100 Bacteria Planctomycetota Planctomycetia Pirellulales
## 250 Bacteria Pseudomonadota Alphaproteobacteria Nitrososphaerales
## Family_gtdb Genus_gtdb Species_gtdb
## 964 Rhodobacteraceae Yoonia Yoonia sp028822885
## 1804 Rhodobacteraceae Planktomarina Planktomarina sp905182195
## 362 Nocardioidaceae Nocardioides Nocardioides antri
## 298 Rhodobacteraceae Planktomarina Planktomarina sp905182195
## 88 Saprospiraceae JALQTV01 JALQTV01 sp023268675
## 836 Methylophagaceae GCA-002733105 GCA-002733105 sp905181885
## 544 Cyclobacteriaceae UBA4465 UBA4465 sp943788235
## 618 Microbacteriaceae Naasia Naasia aerilata
## 1081 Streptomycetaceae Streptomyces Streptomyces sp007096885
## 588 Saprospiraceae JALQTV01 JALQTV01 sp023268675
## 1837 Cyanobiaceae Parasynechococcus Parasynechococcus sp003210375
## 1169 Pseudomonadaceae Pseudomonas_E Pseudomonas_E sp009861525
## 1183 Streptomycetaceae Streptomyces Streptomyces antioxidans
## 1419 UBA1268 UBA1268 UBA1268 sp014238855
## 1542 UBA1268 UBA1268 UBA1268 sp014238855
## 1375 UBA1268 UBA1268 UBA1268 sp016776925
## 1504 UBA1268 UBA1268 UBA1268 sp002862165
## 730 UBA1268 UBA1268 UBA1268 sp014238855
## 1290 UBA1268 UBA1268 UBA1268 sp002862165
## 1633 MED-G09 MED-G09 MED-G09 sp028339975
## 650 MED-G09 MED-G09 MED-G09 sp028339975
## 822 CG-Epi1 CG-Epi1 CG-Epi1 sp014240055
## 100 UBA1268 UBA1268 UBA1268 sp002862165
## 250 Nitrosopumilaceae Nitrosopelagicus Nitrosopumilus sp028448965
ggplot() + geom_hline(yintercept = 0, linetype='dotted') +
geom_vline(xintercept = 0, linetype='dotted') +
labs(x = paste0("RDA1 (",
round(100 * scores$Eigenval.rel[1], 1), "%)"),
y = paste0("RDA2 (",
round(100 * scores$Eigenval.rel[2], 1), "%)")) +
theme(plot.title=element_text(hjust=0.5)) +
geom_segment(data=as.data.frame(AGCScore),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.5,color = 'indianred2',arrow = arrow(length = unit(0.2,"cm")), alpha=0.25) +
geom_point(aes(samples[,1], samples[,2], col=samples$Ni_60_58_D_DELTA_BOTTLE), size = 5, alpha = 0.9) +
geom_text(aes(samples[,1], samples[,2], label=samples$TM_station_number), col="white", size=3) +
geom_segment(data=as.data.frame(enviscore),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.8, linetype="F1",color = 'forestgreen',arrow = arrow(length = unit(0.2,"cm"))) +
geom_text(aes(enviscore[,1]*1.05, enviscore[,2]*1.05, label=rownames(enviscore)), color="forestgreen") +
geom_text(aes(ST8AGC[,1]*1.05, ST8AGC[,2]*1.05, label=rownames(ST8AGC)), color="orange2") +
geom_segment(data=as.data.frame(ST8AGC),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.5,color = 'orange2',arrow = arrow(length = unit(0.2,"cm")), alpha=0.8) +
geom_text(aes(DNiAGC[,1]*1.05, DNiAGC[,2]*1.05, label=rownames(DNiAGC)), color="gold") +
geom_segment(data=as.data.frame(DNiAGC),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.5,color = 'gold',arrow = arrow(length = unit(0.2,"cm")), alpha=0.8) +
geom_text(aes(MertzAGC[,1]*1.05, MertzAGC[,2]*1.05, label=rownames(MertzAGC)), color="brown") +
geom_segment(data=as.data.frame(MertzAGC),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.5,color = 'brown',arrow = arrow(length = unit(0.2,"cm")), alpha=0.8) +
labs(col = expression(delta^60 * Ni ~ "(‰)")) +
theme_bw()
#. II - Attached
meta_nickel_ATT_rda = select(meta_nickel_ATT,c("ACE_seq_name","Ni_60_58_D_DELTA_BOTTLE","Ni_D_CONC_BOTTLE"))
row.names(meta_nickel_ATT_rda)=meta_nickel_ATT_rda$ACE_seq_name
meta_nickel_ATT_rda=select(meta_nickel_ATT_rda,-c("ACE_seq_name"))
meta_nickel_ATT_rda=data.frame(scale(meta_nickel_ATT_rda))
# Get rid of the AGC present in only 1 sample
GeneMat_Nickel_ATT <- GeneMat_Nickel_ATT[,-which(colSums(GeneMat_Nickel_ATT>0)<2)]
# IF USING non transformed data, need to transform the abundances to deal with compositionality
#GeneMat_Nickel_ATT <- decostand(GeneMat_Nickel_ATT, method="rclr")
#Match row names
GeneMat_Nickel_ATT_rda=GeneMat_Nickel_ATT[match(row.names(meta_nickel_ATT_rda),row.names(GeneMat_Nickel_ATT)),]
RDA=rda(GeneMat_Nickel_ATT_rda ~ ., data = meta_nickel_ATT_rda)
#summary(RDA)
RsquareAdj(RDA) #28.7%
## $r.squared
## [1] 0.3705166
##
## $adj.r.squared
## [1] 0.2865855
anova.cca(RDA) #Significant
## Permutation test for rda under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: rda(formula = GeneMat_Nickel_ATT_rda ~ Ni_60_58_D_DELTA_BOTTLE + Ni_D_CONC_BOTTLE, data = meta_nickel_ATT_rda)
## Df Variance F Pr(>F)
## Model 2 43.297 4.4145 0.001 ***
## Residual 15 73.558
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova.cca(RDA, by="axis")
## Permutation test for rda under reduced model
## Forward tests for axes
## Permutation: free
## Number of permutations: 999
##
## Model: rda(formula = GeneMat_Nickel_ATT_rda ~ Ni_60_58_D_DELTA_BOTTLE + Ni_D_CONC_BOTTLE, data = meta_nickel_ATT_rda)
## Df Variance F Pr(>F)
## RDA1 1 33.716 6.8753 0.001 ***
## RDA2 1 9.581 1.9538 0.023 *
## Residual 15 73.558
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
scores=scoresRDA(RDA)
##
## -----------------------------------------------------------------------
## Site constraints (lc) selected. To obtain site scores that are weighted
## sums of species scores (default in vegan), argument site.sc must be set
## to wa.
## -----------------------------------------------------------------------
samples = scores$Sites
samples = as.data.frame(samples)
samples = merge(samples,meta_nickel_ATT, by.x="row.names", by.y="ACE_seq_name")
row.names(samples)=samples[,1]
samples=samples[,-1]
# Retrieve AGC scores
AGCScore <- scores$Species
AGCScore = as.data.frame(AGCScore)
# Keep only scores away from center
AGCScore_select = AGCScore[which(AGCScore$RDA1>0.5 | AGCScore$RDA1<(-0.5) | AGCScore$RDA2>0.5 | AGCScore$RDA2<(-0.5)),]
enviscore=scores$Biplot
#Plot the triplot
ggplot() + geom_hline(yintercept = 0, linetype='dotted') +
geom_vline(xintercept = 0, linetype='dotted') +
labs(x = paste0("RDA1 (",
round(100 * scores$Eigenval.rel[1], 1), "%)"),
y = paste0("RDA2 (",
round(100 * scores$Eigenval.rel[2], 1), "%)")) +
theme(plot.title=element_text(hjust=0.5)) +
geom_segment(data=as.data.frame(AGCScore),aes(xend = RDA1, yend = RDA2),x=0,y=0,size = 0.5,color = 'indianred2',arrow = arrow(length = unit(0.2,"cm")), alpha=0.25) +
geom_point(aes(samples[,1], samples[,2], col=samples$Ni_60_58_D_DELTA_BOTTLE), size = 5, alpha = 0.9) +
geom_text(aes(samples[,1], samples[,2], label=samples$TM_station_number), col="white", size=3) +
geom_segment(data=as.data.frame(enviscore),aes(xend = RDA1, yend = RDA2),x=0,y=0,size = 0.5, linetype="F1",color = 'orange',arrow = arrow(length = unit(0.2,"cm"))) +
geom_text(aes(enviscore[,1]*1.05, enviscore[,2]*1.05, label=rownames(enviscore)), color="orange") +
geom_text(aes(AGCScore_select[,1]*1.05, AGCScore_select[,2]*1.05, label=rownames(AGCScore_select)), color="indianred2") +
labs(col = "DeltaNickel60-58") +
geom_segment(data=as.data.frame(AGCScore_select),aes(xend = RDA1, yend = RDA2),x=0,y=0,size = 0.5,color = 'indianred2',arrow = arrow(length = unit(0.2,"cm"))) +
theme_bw()
# A pack of AGC is associated to station 11
MertzAGC <- AGCScore[which(AGCScore$RDA1>0.25 & AGCScore$RDA2<(-0.5)),]
MertzAGC <- MertzAGC[order(MertzAGC$RDA2, decreasing = F),]
MertzAGCannot <- AGC_Taxo_Annot[which(AGC_Taxo_Annot$AGC_ID %in% row.names(MertzAGC)),]
MertzAGCannot <- MertzAGCannot[match(row.names(MertzAGC), MertzAGCannot$AGC_ID),]
MertzGenes <- Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID %in% row.names(MertzAGC)),]
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_1819471"),]
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_2646602"),]
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_2848081"),]
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_22886549"),]
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_5722614"),]
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_23334844"),]
#summary(Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_2848081"),])
# One is correlated to dNi yet in between station 11 and 8
DNiAGC <- AGCScore[which(AGCScore$RDA1<0.25 & AGCScore$RDA1>(-0.25) & AGCScore$RDA2<(-0.75)),]
DNiAGCannot <- AGC_Taxo_Annot[which(AGC_Taxo_Annot$AGC_ID %in% row.names(DNiAGC)),]
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_24852953"),]
# 19 are associated to station 8
ST8AGC <- AGCScore[which(AGCScore$RDA1<(-0.25) & AGCScore$RDA2<(-0.5)),]
ST8AGC <- ST8AGC[order(ST8AGC$RDA2, decreasing = F),]
ST8AGCannot <- AGC_Taxo_Annot[which(AGC_Taxo_Annot$AGC_ID %in% row.names(ST8AGC)),]
ST8AGCannot <- ST8AGCannot[match(row.names(ST8AGC), ST8AGCannot$AGC_ID),]
ggplot() + geom_hline(yintercept = 0, linetype='dotted') +
geom_vline(xintercept = 0, linetype='dotted') +
labs(x = paste0("RDA1 (",
round(100 * scores$Eigenval.rel[1], 1), "%)"),
y = paste0("RDA2 (",
round(100 * scores$Eigenval.rel[2], 1), "%)")) +
theme(plot.title=element_text(hjust=0.5)) +
geom_segment(data=as.data.frame(AGCScore),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.5,color = 'indianred2',arrow = arrow(length = unit(0.2,"cm")), alpha=0.25) +
geom_point(aes(samples[,1], samples[,2], col=samples$Ni_60_58_D_DELTA_BOTTLE), size = 5, alpha = 0.9) +
geom_text(aes(samples[,1], samples[,2], label=samples$TM_station_number), col="white", size=3) +
geom_segment(data=as.data.frame(enviscore),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.8, linetype="F1",color = 'forestgreen',arrow = arrow(length = unit(0.2,"cm"))) +
geom_text(aes(enviscore[,1]*1.05, enviscore[,2]*1.05, label=rownames(enviscore)), color="forestgreen") +
geom_text(aes(ST8AGC[,1]*1.05, ST8AGC[,2]*1.05, label=rownames(ST8AGC)), color="orange2") +
geom_segment(data=as.data.frame(ST8AGC),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.5,color = 'orange2',arrow = arrow(length = unit(0.2,"cm"))) +
geom_text(aes(DNiAGC[,1]*1.05, DNiAGC[,2]*1.05, label=rownames(DNiAGC)), color="gold") +
geom_segment(data=as.data.frame(DNiAGC),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.5,color = 'gold',arrow = arrow(length = unit(0.2,"cm"))) +
geom_text(aes(MertzAGC[,1]*1.05, MertzAGC[,2]*1.05, label=rownames(MertzAGC)), color="brown") +
geom_segment(data=as.data.frame(MertzAGC),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.5,color = 'brown',arrow = arrow(length = unit(0.2,"cm"))) +
labs(col = expression(delta^60 * Ni ~ "(‰)")) +
theme_bw()